4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
49 class LogicalUnit(object):
50 """Logical Unit base class.
52 Subclasses must follow these rules:
53 - implement ExpandNames
54 - implement CheckPrereq (except when tasklets are used)
55 - implement Exec (except when tasklets are used)
56 - implement BuildHooksEnv
57 - redefine HPATH and HTYPE
58 - optionally redefine their run requirements:
59 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
61 Note that all commands require root permissions.
63 @ivar dry_run_result: the value (if any) that will be returned to the caller
64 in dry-run mode (signalled by opcode dry_run parameter)
72 def __init__(self, processor, op, context, rpc):
73 """Constructor for LogicalUnit.
75 This needs to be overridden in derived classes in order to check op
81 self.cfg = context.cfg
82 self.context = context
84 # Dicts used to declare locking needs to mcpu
85 self.needed_locks = None
86 self.acquired_locks = {}
87 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
89 self.remove_locks = {}
90 # Used to force good behavior when calling helper functions
91 self.recalculate_locks = {}
94 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
95 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
96 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
98 self.dry_run_result = None
99 # support for generic debug attribute
100 if (not hasattr(self.op, "debug_level") or
101 not isinstance(self.op.debug_level, int)):
102 self.op.debug_level = 0
107 for attr_name in self._OP_REQP:
108 attr_val = getattr(op, attr_name, None)
110 raise errors.OpPrereqError("Required parameter '%s' missing" %
111 attr_name, errors.ECODE_INVAL)
113 self.CheckArguments()
116 """Returns the SshRunner object
120 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
123 ssh = property(fget=__GetSSH)
125 def CheckArguments(self):
126 """Check syntactic validity for the opcode arguments.
128 This method is for doing a simple syntactic check and ensure
129 validity of opcode parameters, without any cluster-related
130 checks. While the same can be accomplished in ExpandNames and/or
131 CheckPrereq, doing these separate is better because:
133 - ExpandNames is left as as purely a lock-related function
134 - CheckPrereq is run after we have acquired locks (and possible
137 The function is allowed to change the self.op attribute so that
138 later methods can no longer worry about missing parameters.
143 def ExpandNames(self):
144 """Expand names for this LU.
146 This method is called before starting to execute the opcode, and it should
147 update all the parameters of the opcode to their canonical form (e.g. a
148 short node name must be fully expanded after this method has successfully
149 completed). This way locking, hooks, logging, ecc. can work correctly.
151 LUs which implement this method must also populate the self.needed_locks
152 member, as a dict with lock levels as keys, and a list of needed lock names
155 - use an empty dict if you don't need any lock
156 - if you don't need any lock at a particular level omit that level
157 - don't put anything for the BGL level
158 - if you want all locks at a level use locking.ALL_SET as a value
160 If you need to share locks (rather than acquire them exclusively) at one
161 level you can modify self.share_locks, setting a true value (usually 1) for
162 that level. By default locks are not shared.
164 This function can also define a list of tasklets, which then will be
165 executed in order instead of the usual LU-level CheckPrereq and Exec
166 functions, if those are not defined by the LU.
170 # Acquire all nodes and one instance
171 self.needed_locks = {
172 locking.LEVEL_NODE: locking.ALL_SET,
173 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
175 # Acquire just two nodes
176 self.needed_locks = {
177 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
180 self.needed_locks = {} # No, you can't leave it to the default value None
183 # The implementation of this method is mandatory only if the new LU is
184 # concurrent, so that old LUs don't need to be changed all at the same
187 self.needed_locks = {} # Exclusive LUs don't need locks.
189 raise NotImplementedError
191 def DeclareLocks(self, level):
192 """Declare LU locking needs for a level
194 While most LUs can just declare their locking needs at ExpandNames time,
195 sometimes there's the need to calculate some locks after having acquired
196 the ones before. This function is called just before acquiring locks at a
197 particular level, but after acquiring the ones at lower levels, and permits
198 such calculations. It can be used to modify self.needed_locks, and by
199 default it does nothing.
201 This function is only called if you have something already set in
202 self.needed_locks for the level.
204 @param level: Locking level which is going to be locked
205 @type level: member of ganeti.locking.LEVELS
209 def CheckPrereq(self):
210 """Check prerequisites for this LU.
212 This method should check that the prerequisites for the execution
213 of this LU are fulfilled. It can do internode communication, but
214 it should be idempotent - no cluster or system changes are
217 The method should raise errors.OpPrereqError in case something is
218 not fulfilled. Its return value is ignored.
220 This method should also update all the parameters of the opcode to
221 their canonical form if it hasn't been done by ExpandNames before.
224 if self.tasklets is not None:
225 for (idx, tl) in enumerate(self.tasklets):
226 logging.debug("Checking prerequisites for tasklet %s/%s",
227 idx + 1, len(self.tasklets))
230 raise NotImplementedError
232 def Exec(self, feedback_fn):
235 This method should implement the actual work. It should raise
236 errors.OpExecError for failures that are somewhat dealt with in
240 if self.tasklets is not None:
241 for (idx, tl) in enumerate(self.tasklets):
242 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
245 raise NotImplementedError
247 def BuildHooksEnv(self):
248 """Build hooks environment for this LU.
250 This method should return a three-node tuple consisting of: a dict
251 containing the environment that will be used for running the
252 specific hook for this LU, a list of node names on which the hook
253 should run before the execution, and a list of node names on which
254 the hook should run after the execution.
256 The keys of the dict must not have 'GANETI_' prefixed as this will
257 be handled in the hooks runner. Also note additional keys will be
258 added by the hooks runner. If the LU doesn't define any
259 environment, an empty dict (and not None) should be returned.
261 No nodes should be returned as an empty list (and not None).
263 Note that if the HPATH for a LU class is None, this function will
267 raise NotImplementedError
269 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
270 """Notify the LU about the results of its hooks.
272 This method is called every time a hooks phase is executed, and notifies
273 the Logical Unit about the hooks' result. The LU can then use it to alter
274 its result based on the hooks. By default the method does nothing and the
275 previous result is passed back unchanged but any LU can define it if it
276 wants to use the local cluster hook-scripts somehow.
278 @param phase: one of L{constants.HOOKS_PHASE_POST} or
279 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
280 @param hook_results: the results of the multi-node hooks rpc call
281 @param feedback_fn: function used send feedback back to the caller
282 @param lu_result: the previous Exec result this LU had, or None
284 @return: the new Exec result, based on the previous result
288 # API must be kept, thus we ignore the unused argument and could
289 # be a function warnings
290 # pylint: disable-msg=W0613,R0201
293 def _ExpandAndLockInstance(self):
294 """Helper function to expand and lock an instance.
296 Many LUs that work on an instance take its name in self.op.instance_name
297 and need to expand it and then declare the expanded name for locking. This
298 function does it, and then updates self.op.instance_name to the expanded
299 name. It also initializes needed_locks as a dict, if this hasn't been done
303 if self.needed_locks is None:
304 self.needed_locks = {}
306 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
307 "_ExpandAndLockInstance called with instance-level locks set"
308 self.op.instance_name = _ExpandInstanceName(self.cfg,
309 self.op.instance_name)
310 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
312 def _LockInstancesNodes(self, primary_only=False):
313 """Helper function to declare instances' nodes for locking.
315 This function should be called after locking one or more instances to lock
316 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
317 with all primary or secondary nodes for instances already locked and
318 present in self.needed_locks[locking.LEVEL_INSTANCE].
320 It should be called from DeclareLocks, and for safety only works if
321 self.recalculate_locks[locking.LEVEL_NODE] is set.
323 In the future it may grow parameters to just lock some instance's nodes, or
324 to just lock primaries or secondary nodes, if needed.
326 If should be called in DeclareLocks in a way similar to::
328 if level == locking.LEVEL_NODE:
329 self._LockInstancesNodes()
331 @type primary_only: boolean
332 @param primary_only: only lock primary nodes of locked instances
335 assert locking.LEVEL_NODE in self.recalculate_locks, \
336 "_LockInstancesNodes helper function called with no nodes to recalculate"
338 # TODO: check if we're really been called with the instance locks held
340 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
341 # future we might want to have different behaviors depending on the value
342 # of self.recalculate_locks[locking.LEVEL_NODE]
344 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
345 instance = self.context.cfg.GetInstanceInfo(instance_name)
346 wanted_nodes.append(instance.primary_node)
348 wanted_nodes.extend(instance.secondary_nodes)
350 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
351 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
352 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
353 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
355 del self.recalculate_locks[locking.LEVEL_NODE]
358 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
359 """Simple LU which runs no hooks.
361 This LU is intended as a parent for other LogicalUnits which will
362 run no hooks, in order to reduce duplicate code.
368 def BuildHooksEnv(self):
369 """Empty BuildHooksEnv for NoHooksLu.
371 This just raises an error.
374 assert False, "BuildHooksEnv called for NoHooksLUs"
378 """Tasklet base class.
380 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
381 they can mix legacy code with tasklets. Locking needs to be done in the LU,
382 tasklets know nothing about locks.
384 Subclasses must follow these rules:
385 - Implement CheckPrereq
389 def __init__(self, lu):
396 def CheckPrereq(self):
397 """Check prerequisites for this tasklets.
399 This method should check whether the prerequisites for the execution of
400 this tasklet are fulfilled. It can do internode communication, but it
401 should be idempotent - no cluster or system changes are allowed.
403 The method should raise errors.OpPrereqError in case something is not
404 fulfilled. Its return value is ignored.
406 This method should also update all parameters to their canonical form if it
407 hasn't been done before.
410 raise NotImplementedError
412 def Exec(self, feedback_fn):
413 """Execute the tasklet.
415 This method should implement the actual work. It should raise
416 errors.OpExecError for failures that are somewhat dealt with in code, or
420 raise NotImplementedError
423 def _GetWantedNodes(lu, nodes):
424 """Returns list of checked and expanded node names.
426 @type lu: L{LogicalUnit}
427 @param lu: the logical unit on whose behalf we execute
429 @param nodes: list of node names or None for all nodes
431 @return: the list of nodes, sorted
432 @raise errors.ProgrammerError: if the nodes parameter is wrong type
435 if not isinstance(nodes, list):
436 raise errors.OpPrereqError("Invalid argument type 'nodes'",
440 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
441 " non-empty list of nodes whose name is to be expanded.")
443 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
444 return utils.NiceSort(wanted)
447 def _GetWantedInstances(lu, instances):
448 """Returns list of checked and expanded instance names.
450 @type lu: L{LogicalUnit}
451 @param lu: the logical unit on whose behalf we execute
452 @type instances: list
453 @param instances: list of instance names or None for all instances
455 @return: the list of instances, sorted
456 @raise errors.OpPrereqError: if the instances parameter is wrong type
457 @raise errors.OpPrereqError: if any of the passed instances is not found
460 if not isinstance(instances, list):
461 raise errors.OpPrereqError("Invalid argument type 'instances'",
465 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
467 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
471 def _CheckOutputFields(static, dynamic, selected):
472 """Checks whether all selected fields are valid.
474 @type static: L{utils.FieldSet}
475 @param static: static fields set
476 @type dynamic: L{utils.FieldSet}
477 @param dynamic: dynamic fields set
484 delta = f.NonMatching(selected)
486 raise errors.OpPrereqError("Unknown output fields selected: %s"
487 % ",".join(delta), errors.ECODE_INVAL)
490 def _CheckBooleanOpField(op, name):
491 """Validates boolean opcode parameters.
493 This will ensure that an opcode parameter is either a boolean value,
494 or None (but that it always exists).
497 val = getattr(op, name, None)
498 if not (val is None or isinstance(val, bool)):
499 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
500 (name, str(val)), errors.ECODE_INVAL)
501 setattr(op, name, val)
504 def _CheckGlobalHvParams(params):
505 """Validates that given hypervisor params are not global ones.
507 This will ensure that instances don't get customised versions of
511 used_globals = constants.HVC_GLOBALS.intersection(params)
513 msg = ("The following hypervisor parameters are global and cannot"
514 " be customized at instance level, please modify them at"
515 " cluster level: %s" % utils.CommaJoin(used_globals))
516 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
519 def _CheckNodeOnline(lu, node):
520 """Ensure that a given node is online.
522 @param lu: the LU on behalf of which we make the check
523 @param node: the node to check
524 @raise errors.OpPrereqError: if the node is offline
527 if lu.cfg.GetNodeInfo(node).offline:
528 raise errors.OpPrereqError("Can't use offline node %s" % node,
532 def _CheckNodeNotDrained(lu, node):
533 """Ensure that a given node is not drained.
535 @param lu: the LU on behalf of which we make the check
536 @param node: the node to check
537 @raise errors.OpPrereqError: if the node is drained
540 if lu.cfg.GetNodeInfo(node).drained:
541 raise errors.OpPrereqError("Can't use drained node %s" % node,
545 def _CheckDiskTemplate(template):
546 """Ensure a given disk template is valid.
549 if template not in constants.DISK_TEMPLATES:
550 msg = ("Invalid disk template name '%s', valid templates are: %s" %
551 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
552 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
555 def _CheckInstanceDown(lu, instance, reason):
556 """Ensure that an instance is not running."""
557 if instance.admin_up:
558 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
559 (instance.name, reason), errors.ECODE_STATE)
561 pnode = instance.primary_node
562 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
563 ins_l.Raise("Can't contact node %s for instance information" % pnode,
564 prereq=True, ecode=errors.ECODE_ENVIRON)
566 if instance.name in ins_l.payload:
567 raise errors.OpPrereqError("Instance %s is running, %s" %
568 (instance.name, reason), errors.ECODE_STATE)
571 def _ExpandItemName(fn, name, kind):
572 """Expand an item name.
574 @param fn: the function to use for expansion
575 @param name: requested item name
576 @param kind: text description ('Node' or 'Instance')
577 @return: the resolved (full) name
578 @raise errors.OpPrereqError: if the item is not found
582 if full_name is None:
583 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
588 def _ExpandNodeName(cfg, name):
589 """Wrapper over L{_ExpandItemName} for nodes."""
590 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
593 def _ExpandInstanceName(cfg, name):
594 """Wrapper over L{_ExpandItemName} for instance."""
595 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
598 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
599 memory, vcpus, nics, disk_template, disks,
600 bep, hvp, hypervisor_name):
601 """Builds instance related env variables for hooks
603 This builds the hook environment from individual variables.
606 @param name: the name of the instance
607 @type primary_node: string
608 @param primary_node: the name of the instance's primary node
609 @type secondary_nodes: list
610 @param secondary_nodes: list of secondary nodes as strings
611 @type os_type: string
612 @param os_type: the name of the instance's OS
613 @type status: boolean
614 @param status: the should_run status of the instance
616 @param memory: the memory size of the instance
618 @param vcpus: the count of VCPUs the instance has
620 @param nics: list of tuples (ip, mac, mode, link) representing
621 the NICs the instance has
622 @type disk_template: string
623 @param disk_template: the disk template of the instance
625 @param disks: the list of (size, mode) pairs
627 @param bep: the backend parameters for the instance
629 @param hvp: the hypervisor parameters for the instance
630 @type hypervisor_name: string
631 @param hypervisor_name: the hypervisor for the instance
633 @return: the hook environment for this instance
642 "INSTANCE_NAME": name,
643 "INSTANCE_PRIMARY": primary_node,
644 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
645 "INSTANCE_OS_TYPE": os_type,
646 "INSTANCE_STATUS": str_status,
647 "INSTANCE_MEMORY": memory,
648 "INSTANCE_VCPUS": vcpus,
649 "INSTANCE_DISK_TEMPLATE": disk_template,
650 "INSTANCE_HYPERVISOR": hypervisor_name,
654 nic_count = len(nics)
655 for idx, (ip, mac, mode, link) in enumerate(nics):
658 env["INSTANCE_NIC%d_IP" % idx] = ip
659 env["INSTANCE_NIC%d_MAC" % idx] = mac
660 env["INSTANCE_NIC%d_MODE" % idx] = mode
661 env["INSTANCE_NIC%d_LINK" % idx] = link
662 if mode == constants.NIC_MODE_BRIDGED:
663 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
667 env["INSTANCE_NIC_COUNT"] = nic_count
670 disk_count = len(disks)
671 for idx, (size, mode) in enumerate(disks):
672 env["INSTANCE_DISK%d_SIZE" % idx] = size
673 env["INSTANCE_DISK%d_MODE" % idx] = mode
677 env["INSTANCE_DISK_COUNT"] = disk_count
679 for source, kind in [(bep, "BE"), (hvp, "HV")]:
680 for key, value in source.items():
681 env["INSTANCE_%s_%s" % (kind, key)] = value
686 def _NICListToTuple(lu, nics):
687 """Build a list of nic information tuples.
689 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
690 value in LUQueryInstanceData.
692 @type lu: L{LogicalUnit}
693 @param lu: the logical unit on whose behalf we execute
694 @type nics: list of L{objects.NIC}
695 @param nics: list of nics to convert to hooks tuples
699 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
703 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
704 mode = filled_params[constants.NIC_MODE]
705 link = filled_params[constants.NIC_LINK]
706 hooks_nics.append((ip, mac, mode, link))
710 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
711 """Builds instance related env variables for hooks from an object.
713 @type lu: L{LogicalUnit}
714 @param lu: the logical unit on whose behalf we execute
715 @type instance: L{objects.Instance}
716 @param instance: the instance for which we should build the
719 @param override: dictionary with key/values that will override
722 @return: the hook environment dictionary
725 cluster = lu.cfg.GetClusterInfo()
726 bep = cluster.FillBE(instance)
727 hvp = cluster.FillHV(instance)
729 'name': instance.name,
730 'primary_node': instance.primary_node,
731 'secondary_nodes': instance.secondary_nodes,
732 'os_type': instance.os,
733 'status': instance.admin_up,
734 'memory': bep[constants.BE_MEMORY],
735 'vcpus': bep[constants.BE_VCPUS],
736 'nics': _NICListToTuple(lu, instance.nics),
737 'disk_template': instance.disk_template,
738 'disks': [(disk.size, disk.mode) for disk in instance.disks],
741 'hypervisor_name': instance.hypervisor,
744 args.update(override)
745 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
748 def _AdjustCandidatePool(lu, exceptions):
749 """Adjust the candidate pool after node operations.
752 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
754 lu.LogInfo("Promoted nodes to master candidate role: %s",
755 utils.CommaJoin(node.name for node in mod_list))
756 for name in mod_list:
757 lu.context.ReaddNode(name)
758 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
760 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
764 def _DecideSelfPromotion(lu, exceptions=None):
765 """Decide whether I should promote myself as a master candidate.
768 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
769 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
770 # the new node will increase mc_max with one, so:
771 mc_should = min(mc_should + 1, cp_size)
772 return mc_now < mc_should
775 def _CheckNicsBridgesExist(lu, target_nics, target_node,
776 profile=constants.PP_DEFAULT):
777 """Check that the brigdes needed by a list of nics exist.
780 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
781 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
782 for nic in target_nics]
783 brlist = [params[constants.NIC_LINK] for params in paramslist
784 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
786 result = lu.rpc.call_bridges_exist(target_node, brlist)
787 result.Raise("Error checking bridges on destination node '%s'" %
788 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
791 def _CheckInstanceBridgesExist(lu, instance, node=None):
792 """Check that the brigdes needed by an instance exist.
796 node = instance.primary_node
797 _CheckNicsBridgesExist(lu, instance.nics, node)
800 def _CheckOSVariant(os_obj, name):
801 """Check whether an OS name conforms to the os variants specification.
803 @type os_obj: L{objects.OS}
804 @param os_obj: OS object to check
806 @param name: OS name passed by the user, to check for validity
809 if not os_obj.supported_variants:
812 variant = name.split("+", 1)[1]
814 raise errors.OpPrereqError("OS name must include a variant",
817 if variant not in os_obj.supported_variants:
818 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
821 def _GetNodeInstancesInner(cfg, fn):
822 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
825 def _GetNodeInstances(cfg, node_name):
826 """Returns a list of all primary and secondary instances on a node.
830 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
833 def _GetNodePrimaryInstances(cfg, node_name):
834 """Returns primary instances on a node.
837 return _GetNodeInstancesInner(cfg,
838 lambda inst: node_name == inst.primary_node)
841 def _GetNodeSecondaryInstances(cfg, node_name):
842 """Returns secondary instances on a node.
845 return _GetNodeInstancesInner(cfg,
846 lambda inst: node_name in inst.secondary_nodes)
849 def _GetStorageTypeArgs(cfg, storage_type):
850 """Returns the arguments for a storage type.
853 # Special case for file storage
854 if storage_type == constants.ST_FILE:
855 # storage.FileStorage wants a list of storage directories
856 return [[cfg.GetFileStorageDir()]]
861 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
864 for dev in instance.disks:
865 cfg.SetDiskID(dev, node_name)
867 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
868 result.Raise("Failed to get disk status from node %s" % node_name,
869 prereq=prereq, ecode=errors.ECODE_ENVIRON)
871 for idx, bdev_status in enumerate(result.payload):
872 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
878 def _FormatTimestamp(secs):
879 """Formats a Unix timestamp with the local timezone.
882 return time.strftime("%F %T %Z", time.gmtime(secs))
885 class LUPostInitCluster(LogicalUnit):
886 """Logical unit for running hooks after cluster initialization.
889 HPATH = "cluster-init"
890 HTYPE = constants.HTYPE_CLUSTER
893 def BuildHooksEnv(self):
897 env = {"OP_TARGET": self.cfg.GetClusterName()}
898 mn = self.cfg.GetMasterNode()
901 def CheckPrereq(self):
902 """No prerequisites to check.
907 def Exec(self, feedback_fn):
914 class LUDestroyCluster(LogicalUnit):
915 """Logical unit for destroying the cluster.
918 HPATH = "cluster-destroy"
919 HTYPE = constants.HTYPE_CLUSTER
922 def BuildHooksEnv(self):
926 env = {"OP_TARGET": self.cfg.GetClusterName()}
929 def CheckPrereq(self):
930 """Check prerequisites.
932 This checks whether the cluster is empty.
934 Any errors are signaled by raising errors.OpPrereqError.
937 master = self.cfg.GetMasterNode()
939 nodelist = self.cfg.GetNodeList()
940 if len(nodelist) != 1 or nodelist[0] != master:
941 raise errors.OpPrereqError("There are still %d node(s) in"
942 " this cluster." % (len(nodelist) - 1),
944 instancelist = self.cfg.GetInstanceList()
946 raise errors.OpPrereqError("There are still %d instance(s) in"
947 " this cluster." % len(instancelist),
950 def Exec(self, feedback_fn):
951 """Destroys the cluster.
954 master = self.cfg.GetMasterNode()
955 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
957 # Run post hooks on master node before it's removed
958 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
960 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
962 # pylint: disable-msg=W0702
963 self.LogWarning("Errors occurred running hooks on %s" % master)
965 result = self.rpc.call_node_stop_master(master, False)
966 result.Raise("Could not disable the master role")
969 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
970 utils.CreateBackup(priv_key)
971 utils.CreateBackup(pub_key)
976 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
977 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
978 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
979 """Verifies certificate details for LUVerifyCluster.
983 msg = "Certificate %s is expired" % filename
985 if not_before is not None and not_after is not None:
986 msg += (" (valid from %s to %s)" %
987 (_FormatTimestamp(not_before),
988 _FormatTimestamp(not_after)))
989 elif not_before is not None:
990 msg += " (valid from %s)" % _FormatTimestamp(not_before)
991 elif not_after is not None:
992 msg += " (valid until %s)" % _FormatTimestamp(not_after)
994 return (LUVerifyCluster.ETYPE_ERROR, msg)
996 elif not_before is not None and not_before > now:
997 return (LUVerifyCluster.ETYPE_WARNING,
998 "Certificate %s not yet valid (valid from %s)" %
999 (filename, _FormatTimestamp(not_before)))
1001 elif not_after is not None:
1002 remaining_days = int((not_after - now) / (24 * 3600))
1004 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1006 if remaining_days <= error_days:
1007 return (LUVerifyCluster.ETYPE_ERROR, msg)
1009 if remaining_days <= warn_days:
1010 return (LUVerifyCluster.ETYPE_WARNING, msg)
1015 def _VerifyCertificate(filename):
1016 """Verifies a certificate for LUVerifyCluster.
1018 @type filename: string
1019 @param filename: Path to PEM file
1023 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1024 utils.ReadFile(filename))
1025 except Exception, err: # pylint: disable-msg=W0703
1026 return (LUVerifyCluster.ETYPE_ERROR,
1027 "Failed to load X509 certificate %s: %s" % (filename, err))
1029 # Depending on the pyOpenSSL version, this can just return (None, None)
1030 (not_before, not_after) = utils.GetX509CertValidity(cert)
1032 return _VerifyCertificateInner(filename, cert.has_expired(),
1033 not_before, not_after, time.time())
1036 class LUVerifyCluster(LogicalUnit):
1037 """Verifies the cluster status.
1040 HPATH = "cluster-verify"
1041 HTYPE = constants.HTYPE_CLUSTER
1042 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1045 TCLUSTER = "cluster"
1047 TINSTANCE = "instance"
1049 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1050 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1051 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1052 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1053 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1054 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1055 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1056 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1057 ENODEDRBD = (TNODE, "ENODEDRBD")
1058 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1059 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1060 ENODEHV = (TNODE, "ENODEHV")
1061 ENODELVM = (TNODE, "ENODELVM")
1062 ENODEN1 = (TNODE, "ENODEN1")
1063 ENODENET = (TNODE, "ENODENET")
1064 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1065 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1066 ENODERPC = (TNODE, "ENODERPC")
1067 ENODESSH = (TNODE, "ENODESSH")
1068 ENODEVERSION = (TNODE, "ENODEVERSION")
1069 ENODESETUP = (TNODE, "ENODESETUP")
1070 ENODETIME = (TNODE, "ENODETIME")
1072 ETYPE_FIELD = "code"
1073 ETYPE_ERROR = "ERROR"
1074 ETYPE_WARNING = "WARNING"
1076 def ExpandNames(self):
1077 self.needed_locks = {
1078 locking.LEVEL_NODE: locking.ALL_SET,
1079 locking.LEVEL_INSTANCE: locking.ALL_SET,
1081 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1083 def _Error(self, ecode, item, msg, *args, **kwargs):
1084 """Format an error message.
1086 Based on the opcode's error_codes parameter, either format a
1087 parseable error code, or a simpler error string.
1089 This must be called only from Exec and functions called from Exec.
1092 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1094 # first complete the msg
1097 # then format the whole message
1098 if self.op.error_codes:
1099 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1105 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1106 # and finally report it via the feedback_fn
1107 self._feedback_fn(" - %s" % msg)
1109 def _ErrorIf(self, cond, *args, **kwargs):
1110 """Log an error message if the passed condition is True.
1113 cond = bool(cond) or self.op.debug_simulate_errors
1115 self._Error(*args, **kwargs)
1116 # do not mark the operation as failed for WARN cases only
1117 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1118 self.bad = self.bad or cond
1120 def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1121 node_result, master_files, drbd_map, vg_name):
1122 """Run multiple tests against a node.
1126 - compares ganeti version
1127 - checks vg existence and size > 20G
1128 - checks config file checksum
1129 - checks ssh to other nodes
1131 @type nodeinfo: L{objects.Node}
1132 @param nodeinfo: the node to check
1133 @param file_list: required list of files
1134 @param local_cksum: dictionary of local files and their checksums
1135 @param node_result: the results from the node
1136 @param master_files: list of files that only masters should have
1137 @param drbd_map: the useddrbd minors for this node, in
1138 form of minor: (instance, must_exist) which correspond to instances
1139 and their running status
1140 @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1143 node = nodeinfo.name
1144 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1146 # main result, node_result should be a non-empty dict
1147 test = not node_result or not isinstance(node_result, dict)
1148 _ErrorIf(test, self.ENODERPC, node,
1149 "unable to verify node: no data returned")
1153 # compares ganeti version
1154 local_version = constants.PROTOCOL_VERSION
1155 remote_version = node_result.get('version', None)
1156 test = not (remote_version and
1157 isinstance(remote_version, (list, tuple)) and
1158 len(remote_version) == 2)
1159 _ErrorIf(test, self.ENODERPC, node,
1160 "connection to node returned invalid data")
1164 test = local_version != remote_version[0]
1165 _ErrorIf(test, self.ENODEVERSION, node,
1166 "incompatible protocol versions: master %s,"
1167 " node %s", local_version, remote_version[0])
1171 # node seems compatible, we can actually try to look into its results
1173 # full package version
1174 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1175 self.ENODEVERSION, node,
1176 "software version mismatch: master %s, node %s",
1177 constants.RELEASE_VERSION, remote_version[1],
1178 code=self.ETYPE_WARNING)
1180 # checks vg existence and size > 20G
1181 if vg_name is not None:
1182 vglist = node_result.get(constants.NV_VGLIST, None)
1184 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1186 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1187 constants.MIN_VG_SIZE)
1188 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1190 # checks config file checksum
1192 remote_cksum = node_result.get(constants.NV_FILELIST, None)
1193 test = not isinstance(remote_cksum, dict)
1194 _ErrorIf(test, self.ENODEFILECHECK, node,
1195 "node hasn't returned file checksum data")
1197 for file_name in file_list:
1198 node_is_mc = nodeinfo.master_candidate
1199 must_have = (file_name not in master_files) or node_is_mc
1201 test1 = file_name not in remote_cksum
1203 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1205 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1206 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1207 "file '%s' missing", file_name)
1208 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1209 "file '%s' has wrong checksum", file_name)
1210 # not candidate and this is not a must-have file
1211 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1212 "file '%s' should not exist on non master"
1213 " candidates (and the file is outdated)", file_name)
1214 # all good, except non-master/non-must have combination
1215 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1216 "file '%s' should not exist"
1217 " on non master candidates", file_name)
1221 test = constants.NV_NODELIST not in node_result
1222 _ErrorIf(test, self.ENODESSH, node,
1223 "node hasn't returned node ssh connectivity data")
1225 if node_result[constants.NV_NODELIST]:
1226 for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1227 _ErrorIf(True, self.ENODESSH, node,
1228 "ssh communication with node '%s': %s", a_node, a_msg)
1230 test = constants.NV_NODENETTEST not in node_result
1231 _ErrorIf(test, self.ENODENET, node,
1232 "node hasn't returned node tcp connectivity data")
1234 if node_result[constants.NV_NODENETTEST]:
1235 nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1237 _ErrorIf(True, self.ENODENET, node,
1238 "tcp communication with node '%s': %s",
1239 anode, node_result[constants.NV_NODENETTEST][anode])
1241 hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1242 if isinstance(hyp_result, dict):
1243 for hv_name, hv_result in hyp_result.iteritems():
1244 test = hv_result is not None
1245 _ErrorIf(test, self.ENODEHV, node,
1246 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1248 # check used drbd list
1249 if vg_name is not None:
1250 used_minors = node_result.get(constants.NV_DRBDLIST, [])
1251 test = not isinstance(used_minors, (tuple, list))
1252 _ErrorIf(test, self.ENODEDRBD, node,
1253 "cannot parse drbd status file: %s", str(used_minors))
1255 for minor, (iname, must_exist) in drbd_map.items():
1256 test = minor not in used_minors and must_exist
1257 _ErrorIf(test, self.ENODEDRBD, node,
1258 "drbd minor %d of instance %s is not active",
1260 for minor in used_minors:
1261 test = minor not in drbd_map
1262 _ErrorIf(test, self.ENODEDRBD, node,
1263 "unallocated drbd minor %d is in use", minor)
1264 test = node_result.get(constants.NV_NODESETUP,
1265 ["Missing NODESETUP results"])
1266 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1270 if vg_name is not None:
1271 pvlist = node_result.get(constants.NV_PVLIST, None)
1272 test = pvlist is None
1273 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1275 # check that ':' is not present in PV names, since it's a
1276 # special character for lvcreate (denotes the range of PEs to
1278 for _, pvname, owner_vg in pvlist:
1279 test = ":" in pvname
1280 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1281 " '%s' of VG '%s'", pvname, owner_vg)
1283 def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1284 node_instance, n_offline):
1285 """Verify an instance.
1287 This function checks to see if the required block devices are
1288 available on the instance's node.
1291 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1292 node_current = instanceconfig.primary_node
1294 node_vol_should = {}
1295 instanceconfig.MapLVsByNode(node_vol_should)
1297 for node in node_vol_should:
1298 if node in n_offline:
1299 # ignore missing volumes on offline nodes
1301 for volume in node_vol_should[node]:
1302 test = node not in node_vol_is or volume not in node_vol_is[node]
1303 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1304 "volume %s missing on node %s", volume, node)
1306 if instanceconfig.admin_up:
1307 test = ((node_current not in node_instance or
1308 not instance in node_instance[node_current]) and
1309 node_current not in n_offline)
1310 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1311 "instance not running on its primary node %s",
1314 for node in node_instance:
1315 if (not node == node_current):
1316 test = instance in node_instance[node]
1317 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1318 "instance should not run on node %s", node)
1320 def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1321 """Verify if there are any unknown volumes in the cluster.
1323 The .os, .swap and backup volumes are ignored. All other volumes are
1324 reported as unknown.
1327 for node in node_vol_is:
1328 for volume in node_vol_is[node]:
1329 test = (node not in node_vol_should or
1330 volume not in node_vol_should[node])
1331 self._ErrorIf(test, self.ENODEORPHANLV, node,
1332 "volume %s is unknown", volume)
1334 def _VerifyOrphanInstances(self, instancelist, node_instance):
1335 """Verify the list of running instances.
1337 This checks what instances are running but unknown to the cluster.
1340 for node in node_instance:
1341 for o_inst in node_instance[node]:
1342 test = o_inst not in instancelist
1343 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1344 "instance %s on node %s should not exist", o_inst, node)
1346 def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1347 """Verify N+1 Memory Resilience.
1349 Check that if one single node dies we can still start all the instances it
1353 for node, nodeinfo in node_info.iteritems():
1354 # This code checks that every node which is now listed as secondary has
1355 # enough memory to host all instances it is supposed to should a single
1356 # other node in the cluster fail.
1357 # FIXME: not ready for failover to an arbitrary node
1358 # FIXME: does not support file-backed instances
1359 # WARNING: we currently take into account down instances as well as up
1360 # ones, considering that even if they're down someone might want to start
1361 # them even in the event of a node failure.
1362 for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1364 for instance in instances:
1365 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1366 if bep[constants.BE_AUTO_BALANCE]:
1367 needed_mem += bep[constants.BE_MEMORY]
1368 test = nodeinfo['mfree'] < needed_mem
1369 self._ErrorIf(test, self.ENODEN1, node,
1370 "not enough memory on to accommodate"
1371 " failovers should peer node %s fail", prinode)
1373 def CheckPrereq(self):
1374 """Check prerequisites.
1376 Transform the list of checks we're going to skip into a set and check that
1377 all its members are valid.
1380 self.skip_set = frozenset(self.op.skip_checks)
1381 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1382 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1385 def BuildHooksEnv(self):
1388 Cluster-Verify hooks just ran in the post phase and their failure makes
1389 the output be logged in the verify output and the verification to fail.
1392 all_nodes = self.cfg.GetNodeList()
1394 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1396 for node in self.cfg.GetAllNodesInfo().values():
1397 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1399 return env, [], all_nodes
1401 def Exec(self, feedback_fn):
1402 """Verify integrity of cluster, performing various test on nodes.
1406 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1407 verbose = self.op.verbose
1408 self._feedback_fn = feedback_fn
1409 feedback_fn("* Verifying global settings")
1410 for msg in self.cfg.VerifyConfig():
1411 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1413 # Check the cluster certificates
1414 for cert_filename in constants.ALL_CERT_FILES:
1415 (errcode, msg) = _VerifyCertificate(cert_filename)
1416 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1418 vg_name = self.cfg.GetVGName()
1419 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1420 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1421 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1422 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1423 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1424 for iname in instancelist)
1425 i_non_redundant = [] # Non redundant instances
1426 i_non_a_balanced = [] # Non auto-balanced instances
1427 n_offline = [] # List of offline nodes
1428 n_drained = [] # List of nodes being drained
1434 # FIXME: verify OS list
1435 # do local checksums
1436 master_files = [constants.CLUSTER_CONF_FILE]
1438 file_names = ssconf.SimpleStore().GetFileList()
1439 file_names.extend(constants.ALL_CERT_FILES)
1440 file_names.extend(master_files)
1442 local_checksums = utils.FingerprintFiles(file_names)
1444 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1445 node_verify_param = {
1446 constants.NV_FILELIST: file_names,
1447 constants.NV_NODELIST: [node.name for node in nodeinfo
1448 if not node.offline],
1449 constants.NV_HYPERVISOR: hypervisors,
1450 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1451 node.secondary_ip) for node in nodeinfo
1452 if not node.offline],
1453 constants.NV_INSTANCELIST: hypervisors,
1454 constants.NV_VERSION: None,
1455 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1456 constants.NV_NODESETUP: None,
1457 constants.NV_TIME: None,
1460 if vg_name is not None:
1461 node_verify_param[constants.NV_VGLIST] = None
1462 node_verify_param[constants.NV_LVLIST] = vg_name
1463 node_verify_param[constants.NV_PVLIST] = [vg_name]
1464 node_verify_param[constants.NV_DRBDLIST] = None
1466 # Due to the way our RPC system works, exact response times cannot be
1467 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1468 # time before and after executing the request, we can at least have a time
1470 nvinfo_starttime = time.time()
1471 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1472 self.cfg.GetClusterName())
1473 nvinfo_endtime = time.time()
1475 cluster = self.cfg.GetClusterInfo()
1476 master_node = self.cfg.GetMasterNode()
1477 all_drbd_map = self.cfg.ComputeDRBDMap()
1479 feedback_fn("* Verifying node status")
1480 for node_i in nodeinfo:
1485 feedback_fn("* Skipping offline node %s" % (node,))
1486 n_offline.append(node)
1489 if node == master_node:
1491 elif node_i.master_candidate:
1492 ntype = "master candidate"
1493 elif node_i.drained:
1495 n_drained.append(node)
1499 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1501 msg = all_nvinfo[node].fail_msg
1502 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1506 nresult = all_nvinfo[node].payload
1508 for minor, instance in all_drbd_map[node].items():
1509 test = instance not in instanceinfo
1510 _ErrorIf(test, self.ECLUSTERCFG, None,
1511 "ghost instance '%s' in temporary DRBD map", instance)
1512 # ghost instance should not be running, but otherwise we
1513 # don't give double warnings (both ghost instance and
1514 # unallocated minor in use)
1516 node_drbd[minor] = (instance, False)
1518 instance = instanceinfo[instance]
1519 node_drbd[minor] = (instance.name, instance.admin_up)
1521 self._VerifyNode(node_i, file_names, local_checksums,
1522 nresult, master_files, node_drbd, vg_name)
1524 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1526 node_volume[node] = {}
1527 elif isinstance(lvdata, basestring):
1528 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1529 utils.SafeEncode(lvdata))
1530 node_volume[node] = {}
1531 elif not isinstance(lvdata, dict):
1532 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1535 node_volume[node] = lvdata
1538 idata = nresult.get(constants.NV_INSTANCELIST, None)
1539 test = not isinstance(idata, list)
1540 _ErrorIf(test, self.ENODEHV, node,
1541 "rpc call to node failed (instancelist): %s",
1542 utils.SafeEncode(str(idata)))
1546 node_instance[node] = idata
1549 nodeinfo = nresult.get(constants.NV_HVINFO, None)
1550 test = not isinstance(nodeinfo, dict)
1551 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1556 ntime = nresult.get(constants.NV_TIME, None)
1558 ntime_merged = utils.MergeTime(ntime)
1559 except (ValueError, TypeError):
1560 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1562 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1563 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1564 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1565 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1569 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1570 "Node time diverges by at least %s from master node time",
1573 if ntime_diff is not None:
1578 "mfree": int(nodeinfo['memory_free']),
1581 # dictionary holding all instances this node is secondary for,
1582 # grouped by their primary node. Each key is a cluster node, and each
1583 # value is a list of instances which have the key as primary and the
1584 # current node as secondary. this is handy to calculate N+1 memory
1585 # availability if you can only failover from a primary to its
1587 "sinst-by-pnode": {},
1589 # FIXME: devise a free space model for file based instances as well
1590 if vg_name is not None:
1591 test = (constants.NV_VGLIST not in nresult or
1592 vg_name not in nresult[constants.NV_VGLIST])
1593 _ErrorIf(test, self.ENODELVM, node,
1594 "node didn't return data for the volume group '%s'"
1595 " - it is either missing or broken", vg_name)
1598 node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1599 except (ValueError, KeyError):
1600 _ErrorIf(True, self.ENODERPC, node,
1601 "node returned invalid nodeinfo, check lvm/hypervisor")
1604 node_vol_should = {}
1606 feedback_fn("* Verifying instance status")
1607 for instance in instancelist:
1609 feedback_fn("* Verifying instance %s" % instance)
1610 inst_config = instanceinfo[instance]
1611 self._VerifyInstance(instance, inst_config, node_volume,
1612 node_instance, n_offline)
1613 inst_nodes_offline = []
1615 inst_config.MapLVsByNode(node_vol_should)
1617 instance_cfg[instance] = inst_config
1619 pnode = inst_config.primary_node
1620 _ErrorIf(pnode not in node_info and pnode not in n_offline,
1621 self.ENODERPC, pnode, "instance %s, connection to"
1622 " primary node failed", instance)
1623 if pnode in node_info:
1624 node_info[pnode]['pinst'].append(instance)
1626 if pnode in n_offline:
1627 inst_nodes_offline.append(pnode)
1629 # If the instance is non-redundant we cannot survive losing its primary
1630 # node, so we are not N+1 compliant. On the other hand we have no disk
1631 # templates with more than one secondary so that situation is not well
1633 # FIXME: does not support file-backed instances
1634 if len(inst_config.secondary_nodes) == 0:
1635 i_non_redundant.append(instance)
1636 _ErrorIf(len(inst_config.secondary_nodes) > 1,
1637 self.EINSTANCELAYOUT, instance,
1638 "instance has multiple secondary nodes", code="WARNING")
1640 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1641 i_non_a_balanced.append(instance)
1643 for snode in inst_config.secondary_nodes:
1644 _ErrorIf(snode not in node_info and snode not in n_offline,
1645 self.ENODERPC, snode,
1646 "instance %s, connection to secondary node"
1647 " failed", instance)
1649 if snode in node_info:
1650 node_info[snode]['sinst'].append(instance)
1651 if pnode not in node_info[snode]['sinst-by-pnode']:
1652 node_info[snode]['sinst-by-pnode'][pnode] = []
1653 node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1655 if snode in n_offline:
1656 inst_nodes_offline.append(snode)
1658 # warn that the instance lives on offline nodes
1659 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1660 "instance lives on offline node(s) %s",
1661 utils.CommaJoin(inst_nodes_offline))
1663 feedback_fn("* Verifying orphan volumes")
1664 self._VerifyOrphanVolumes(node_vol_should, node_volume)
1666 feedback_fn("* Verifying remaining instances")
1667 self._VerifyOrphanInstances(instancelist, node_instance)
1669 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1670 feedback_fn("* Verifying N+1 Memory redundancy")
1671 self._VerifyNPlusOneMemory(node_info, instance_cfg)
1673 feedback_fn("* Other Notes")
1675 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1676 % len(i_non_redundant))
1678 if i_non_a_balanced:
1679 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1680 % len(i_non_a_balanced))
1683 feedback_fn(" - NOTICE: %d offline node(s) found." % len(n_offline))
1686 feedback_fn(" - NOTICE: %d drained node(s) found." % len(n_drained))
1690 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1691 """Analyze the post-hooks' result
1693 This method analyses the hook result, handles it, and sends some
1694 nicely-formatted feedback back to the user.
1696 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1697 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1698 @param hooks_results: the results of the multi-node hooks rpc call
1699 @param feedback_fn: function used send feedback back to the caller
1700 @param lu_result: previous Exec result
1701 @return: the new Exec result, based on the previous result
1705 # We only really run POST phase hooks, and are only interested in
1707 if phase == constants.HOOKS_PHASE_POST:
1708 # Used to change hooks' output to proper indentation
1709 indent_re = re.compile('^', re.M)
1710 feedback_fn("* Hooks Results")
1711 assert hooks_results, "invalid result from hooks"
1713 for node_name in hooks_results:
1714 res = hooks_results[node_name]
1716 test = msg and not res.offline
1717 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1718 "Communication failure in hooks execution: %s", msg)
1719 if res.offline or msg:
1720 # No need to investigate payload if node is offline or gave an error.
1721 # override manually lu_result here as _ErrorIf only
1722 # overrides self.bad
1725 for script, hkr, output in res.payload:
1726 test = hkr == constants.HKR_FAIL
1727 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1728 "Script %s failed, output:", script)
1730 output = indent_re.sub(' ', output)
1731 feedback_fn("%s" % output)
1737 class LUVerifyDisks(NoHooksLU):
1738 """Verifies the cluster disks status.
1744 def ExpandNames(self):
1745 self.needed_locks = {
1746 locking.LEVEL_NODE: locking.ALL_SET,
1747 locking.LEVEL_INSTANCE: locking.ALL_SET,
1749 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1751 def CheckPrereq(self):
1752 """Check prerequisites.
1754 This has no prerequisites.
1759 def Exec(self, feedback_fn):
1760 """Verify integrity of cluster disks.
1762 @rtype: tuple of three items
1763 @return: a tuple of (dict of node-to-node_error, list of instances
1764 which need activate-disks, dict of instance: (node, volume) for
1768 result = res_nodes, res_instances, res_missing = {}, [], {}
1770 vg_name = self.cfg.GetVGName()
1771 nodes = utils.NiceSort(self.cfg.GetNodeList())
1772 instances = [self.cfg.GetInstanceInfo(name)
1773 for name in self.cfg.GetInstanceList()]
1776 for inst in instances:
1778 if (not inst.admin_up or
1779 inst.disk_template not in constants.DTS_NET_MIRROR):
1781 inst.MapLVsByNode(inst_lvs)
1782 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1783 for node, vol_list in inst_lvs.iteritems():
1784 for vol in vol_list:
1785 nv_dict[(node, vol)] = inst
1790 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1794 node_res = node_lvs[node]
1795 if node_res.offline:
1797 msg = node_res.fail_msg
1799 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1800 res_nodes[node] = msg
1803 lvs = node_res.payload
1804 for lv_name, (_, _, lv_online) in lvs.items():
1805 inst = nv_dict.pop((node, lv_name), None)
1806 if (not lv_online and inst is not None
1807 and inst.name not in res_instances):
1808 res_instances.append(inst.name)
1810 # any leftover items in nv_dict are missing LVs, let's arrange the
1812 for key, inst in nv_dict.iteritems():
1813 if inst.name not in res_missing:
1814 res_missing[inst.name] = []
1815 res_missing[inst.name].append(key)
1820 class LURepairDiskSizes(NoHooksLU):
1821 """Verifies the cluster disks sizes.
1824 _OP_REQP = ["instances"]
1827 def ExpandNames(self):
1828 if not isinstance(self.op.instances, list):
1829 raise errors.OpPrereqError("Invalid argument type 'instances'",
1832 if self.op.instances:
1833 self.wanted_names = []
1834 for name in self.op.instances:
1835 full_name = _ExpandInstanceName(self.cfg, name)
1836 self.wanted_names.append(full_name)
1837 self.needed_locks = {
1838 locking.LEVEL_NODE: [],
1839 locking.LEVEL_INSTANCE: self.wanted_names,
1841 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1843 self.wanted_names = None
1844 self.needed_locks = {
1845 locking.LEVEL_NODE: locking.ALL_SET,
1846 locking.LEVEL_INSTANCE: locking.ALL_SET,
1848 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1850 def DeclareLocks(self, level):
1851 if level == locking.LEVEL_NODE and self.wanted_names is not None:
1852 self._LockInstancesNodes(primary_only=True)
1854 def CheckPrereq(self):
1855 """Check prerequisites.
1857 This only checks the optional instance list against the existing names.
1860 if self.wanted_names is None:
1861 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1863 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1864 in self.wanted_names]
1866 def _EnsureChildSizes(self, disk):
1867 """Ensure children of the disk have the needed disk size.
1869 This is valid mainly for DRBD8 and fixes an issue where the
1870 children have smaller disk size.
1872 @param disk: an L{ganeti.objects.Disk} object
1875 if disk.dev_type == constants.LD_DRBD8:
1876 assert disk.children, "Empty children for DRBD8?"
1877 fchild = disk.children[0]
1878 mismatch = fchild.size < disk.size
1880 self.LogInfo("Child disk has size %d, parent %d, fixing",
1881 fchild.size, disk.size)
1882 fchild.size = disk.size
1884 # and we recurse on this child only, not on the metadev
1885 return self._EnsureChildSizes(fchild) or mismatch
1889 def Exec(self, feedback_fn):
1890 """Verify the size of cluster disks.
1893 # TODO: check child disks too
1894 # TODO: check differences in size between primary/secondary nodes
1896 for instance in self.wanted_instances:
1897 pnode = instance.primary_node
1898 if pnode not in per_node_disks:
1899 per_node_disks[pnode] = []
1900 for idx, disk in enumerate(instance.disks):
1901 per_node_disks[pnode].append((instance, idx, disk))
1904 for node, dskl in per_node_disks.items():
1905 newl = [v[2].Copy() for v in dskl]
1907 self.cfg.SetDiskID(dsk, node)
1908 result = self.rpc.call_blockdev_getsizes(node, newl)
1910 self.LogWarning("Failure in blockdev_getsizes call to node"
1911 " %s, ignoring", node)
1913 if len(result.data) != len(dskl):
1914 self.LogWarning("Invalid result from node %s, ignoring node results",
1917 for ((instance, idx, disk), size) in zip(dskl, result.data):
1919 self.LogWarning("Disk %d of instance %s did not return size"
1920 " information, ignoring", idx, instance.name)
1922 if not isinstance(size, (int, long)):
1923 self.LogWarning("Disk %d of instance %s did not return valid"
1924 " size information, ignoring", idx, instance.name)
1927 if size != disk.size:
1928 self.LogInfo("Disk %d of instance %s has mismatched size,"
1929 " correcting: recorded %d, actual %d", idx,
1930 instance.name, disk.size, size)
1932 self.cfg.Update(instance, feedback_fn)
1933 changed.append((instance.name, idx, size))
1934 if self._EnsureChildSizes(disk):
1935 self.cfg.Update(instance, feedback_fn)
1936 changed.append((instance.name, idx, disk.size))
1940 class LURenameCluster(LogicalUnit):
1941 """Rename the cluster.
1944 HPATH = "cluster-rename"
1945 HTYPE = constants.HTYPE_CLUSTER
1948 def BuildHooksEnv(self):
1953 "OP_TARGET": self.cfg.GetClusterName(),
1954 "NEW_NAME": self.op.name,
1956 mn = self.cfg.GetMasterNode()
1957 all_nodes = self.cfg.GetNodeList()
1958 return env, [mn], all_nodes
1960 def CheckPrereq(self):
1961 """Verify that the passed name is a valid one.
1964 hostname = utils.GetHostInfo(self.op.name)
1966 new_name = hostname.name
1967 self.ip = new_ip = hostname.ip
1968 old_name = self.cfg.GetClusterName()
1969 old_ip = self.cfg.GetMasterIP()
1970 if new_name == old_name and new_ip == old_ip:
1971 raise errors.OpPrereqError("Neither the name nor the IP address of the"
1972 " cluster has changed",
1974 if new_ip != old_ip:
1975 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1976 raise errors.OpPrereqError("The given cluster IP address (%s) is"
1977 " reachable on the network. Aborting." %
1978 new_ip, errors.ECODE_NOTUNIQUE)
1980 self.op.name = new_name
1982 def Exec(self, feedback_fn):
1983 """Rename the cluster.
1986 clustername = self.op.name
1989 # shutdown the master IP
1990 master = self.cfg.GetMasterNode()
1991 result = self.rpc.call_node_stop_master(master, False)
1992 result.Raise("Could not disable the master role")
1995 cluster = self.cfg.GetClusterInfo()
1996 cluster.cluster_name = clustername
1997 cluster.master_ip = ip
1998 self.cfg.Update(cluster, feedback_fn)
2000 # update the known hosts file
2001 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2002 node_list = self.cfg.GetNodeList()
2004 node_list.remove(master)
2007 result = self.rpc.call_upload_file(node_list,
2008 constants.SSH_KNOWN_HOSTS_FILE)
2009 for to_node, to_result in result.iteritems():
2010 msg = to_result.fail_msg
2012 msg = ("Copy of file %s to node %s failed: %s" %
2013 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2014 self.proc.LogWarning(msg)
2017 result = self.rpc.call_node_start_master(master, False, False)
2018 msg = result.fail_msg
2020 self.LogWarning("Could not re-enable the master role on"
2021 " the master, please restart manually: %s", msg)
2024 def _RecursiveCheckIfLVMBased(disk):
2025 """Check if the given disk or its children are lvm-based.
2027 @type disk: L{objects.Disk}
2028 @param disk: the disk to check
2030 @return: boolean indicating whether a LD_LV dev_type was found or not
2034 for chdisk in disk.children:
2035 if _RecursiveCheckIfLVMBased(chdisk):
2037 return disk.dev_type == constants.LD_LV
2040 class LUSetClusterParams(LogicalUnit):
2041 """Change the parameters of the cluster.
2044 HPATH = "cluster-modify"
2045 HTYPE = constants.HTYPE_CLUSTER
2049 def CheckArguments(self):
2053 if not hasattr(self.op, "candidate_pool_size"):
2054 self.op.candidate_pool_size = None
2055 if self.op.candidate_pool_size is not None:
2057 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2058 except (ValueError, TypeError), err:
2059 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2060 str(err), errors.ECODE_INVAL)
2061 if self.op.candidate_pool_size < 1:
2062 raise errors.OpPrereqError("At least one master candidate needed",
2065 def ExpandNames(self):
2066 # FIXME: in the future maybe other cluster params won't require checking on
2067 # all nodes to be modified.
2068 self.needed_locks = {
2069 locking.LEVEL_NODE: locking.ALL_SET,
2071 self.share_locks[locking.LEVEL_NODE] = 1
2073 def BuildHooksEnv(self):
2078 "OP_TARGET": self.cfg.GetClusterName(),
2079 "NEW_VG_NAME": self.op.vg_name,
2081 mn = self.cfg.GetMasterNode()
2082 return env, [mn], [mn]
2084 def CheckPrereq(self):
2085 """Check prerequisites.
2087 This checks whether the given params don't conflict and
2088 if the given volume group is valid.
2091 if self.op.vg_name is not None and not self.op.vg_name:
2092 instances = self.cfg.GetAllInstancesInfo().values()
2093 for inst in instances:
2094 for disk in inst.disks:
2095 if _RecursiveCheckIfLVMBased(disk):
2096 raise errors.OpPrereqError("Cannot disable lvm storage while"
2097 " lvm-based instances exist",
2100 node_list = self.acquired_locks[locking.LEVEL_NODE]
2102 # if vg_name not None, checks given volume group on all nodes
2104 vglist = self.rpc.call_vg_list(node_list)
2105 for node in node_list:
2106 msg = vglist[node].fail_msg
2108 # ignoring down node
2109 self.LogWarning("Error while gathering data on node %s"
2110 " (ignoring node): %s", node, msg)
2112 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2114 constants.MIN_VG_SIZE)
2116 raise errors.OpPrereqError("Error on node '%s': %s" %
2117 (node, vgstatus), errors.ECODE_ENVIRON)
2119 self.cluster = cluster = self.cfg.GetClusterInfo()
2120 # validate params changes
2121 if self.op.beparams:
2122 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2123 self.new_beparams = objects.FillDict(
2124 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2126 if self.op.nicparams:
2127 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2128 self.new_nicparams = objects.FillDict(
2129 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2130 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2133 # check all instances for consistency
2134 for instance in self.cfg.GetAllInstancesInfo().values():
2135 for nic_idx, nic in enumerate(instance.nics):
2136 params_copy = copy.deepcopy(nic.nicparams)
2137 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2139 # check parameter syntax
2141 objects.NIC.CheckParameterSyntax(params_filled)
2142 except errors.ConfigurationError, err:
2143 nic_errors.append("Instance %s, nic/%d: %s" %
2144 (instance.name, nic_idx, err))
2146 # if we're moving instances to routed, check that they have an ip
2147 target_mode = params_filled[constants.NIC_MODE]
2148 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2149 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2150 (instance.name, nic_idx))
2152 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2153 "\n".join(nic_errors))
2155 # hypervisor list/parameters
2156 self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2157 if self.op.hvparams:
2158 if not isinstance(self.op.hvparams, dict):
2159 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2161 for hv_name, hv_dict in self.op.hvparams.items():
2162 if hv_name not in self.new_hvparams:
2163 self.new_hvparams[hv_name] = hv_dict
2165 self.new_hvparams[hv_name].update(hv_dict)
2167 # os hypervisor parameters
2168 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2170 if not isinstance(self.op.os_hvp, dict):
2171 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2173 for os_name, hvs in self.op.os_hvp.items():
2174 if not isinstance(hvs, dict):
2175 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2176 " input"), errors.ECODE_INVAL)
2177 if os_name not in self.new_os_hvp:
2178 self.new_os_hvp[os_name] = hvs
2180 for hv_name, hv_dict in hvs.items():
2181 if hv_name not in self.new_os_hvp[os_name]:
2182 self.new_os_hvp[os_name][hv_name] = hv_dict
2184 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2186 if self.op.enabled_hypervisors is not None:
2187 self.hv_list = self.op.enabled_hypervisors
2188 if not self.hv_list:
2189 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2190 " least one member",
2192 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2194 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2196 utils.CommaJoin(invalid_hvs),
2199 self.hv_list = cluster.enabled_hypervisors
2201 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2202 # either the enabled list has changed, or the parameters have, validate
2203 for hv_name, hv_params in self.new_hvparams.items():
2204 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2205 (self.op.enabled_hypervisors and
2206 hv_name in self.op.enabled_hypervisors)):
2207 # either this is a new hypervisor, or its parameters have changed
2208 hv_class = hypervisor.GetHypervisor(hv_name)
2209 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2210 hv_class.CheckParameterSyntax(hv_params)
2211 _CheckHVParams(self, node_list, hv_name, hv_params)
2214 # no need to check any newly-enabled hypervisors, since the
2215 # defaults have already been checked in the above code-block
2216 for os_name, os_hvp in self.new_os_hvp.items():
2217 for hv_name, hv_params in os_hvp.items():
2218 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2219 # we need to fill in the new os_hvp on top of the actual hv_p
2220 cluster_defaults = self.new_hvparams.get(hv_name, {})
2221 new_osp = objects.FillDict(cluster_defaults, hv_params)
2222 hv_class = hypervisor.GetHypervisor(hv_name)
2223 hv_class.CheckParameterSyntax(new_osp)
2224 _CheckHVParams(self, node_list, hv_name, new_osp)
2227 def Exec(self, feedback_fn):
2228 """Change the parameters of the cluster.
2231 if self.op.vg_name is not None:
2232 new_volume = self.op.vg_name
2235 if new_volume != self.cfg.GetVGName():
2236 self.cfg.SetVGName(new_volume)
2238 feedback_fn("Cluster LVM configuration already in desired"
2239 " state, not changing")
2240 if self.op.hvparams:
2241 self.cluster.hvparams = self.new_hvparams
2243 self.cluster.os_hvp = self.new_os_hvp
2244 if self.op.enabled_hypervisors is not None:
2245 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2246 if self.op.beparams:
2247 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2248 if self.op.nicparams:
2249 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2251 if self.op.candidate_pool_size is not None:
2252 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2253 # we need to update the pool size here, otherwise the save will fail
2254 _AdjustCandidatePool(self, [])
2256 self.cfg.Update(self.cluster, feedback_fn)
2259 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2260 """Distribute additional files which are part of the cluster configuration.
2262 ConfigWriter takes care of distributing the config and ssconf files, but
2263 there are more files which should be distributed to all nodes. This function
2264 makes sure those are copied.
2266 @param lu: calling logical unit
2267 @param additional_nodes: list of nodes not in the config to distribute to
2270 # 1. Gather target nodes
2271 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2272 dist_nodes = lu.cfg.GetOnlineNodeList()
2273 if additional_nodes is not None:
2274 dist_nodes.extend(additional_nodes)
2275 if myself.name in dist_nodes:
2276 dist_nodes.remove(myself.name)
2278 # 2. Gather files to distribute
2279 dist_files = set([constants.ETC_HOSTS,
2280 constants.SSH_KNOWN_HOSTS_FILE,
2281 constants.RAPI_CERT_FILE,
2282 constants.RAPI_USERS_FILE,
2283 constants.HMAC_CLUSTER_KEY,
2286 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2287 for hv_name in enabled_hypervisors:
2288 hv_class = hypervisor.GetHypervisor(hv_name)
2289 dist_files.update(hv_class.GetAncillaryFiles())
2291 # 3. Perform the files upload
2292 for fname in dist_files:
2293 if os.path.exists(fname):
2294 result = lu.rpc.call_upload_file(dist_nodes, fname)
2295 for to_node, to_result in result.items():
2296 msg = to_result.fail_msg
2298 msg = ("Copy of file %s to node %s failed: %s" %
2299 (fname, to_node, msg))
2300 lu.proc.LogWarning(msg)
2303 class LURedistributeConfig(NoHooksLU):
2304 """Force the redistribution of cluster configuration.
2306 This is a very simple LU.
2312 def ExpandNames(self):
2313 self.needed_locks = {
2314 locking.LEVEL_NODE: locking.ALL_SET,
2316 self.share_locks[locking.LEVEL_NODE] = 1
2318 def CheckPrereq(self):
2319 """Check prerequisites.
2323 def Exec(self, feedback_fn):
2324 """Redistribute the configuration.
2327 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2328 _RedistributeAncillaryFiles(self)
2331 def _WaitForSync(lu, instance, oneshot=False):
2332 """Sleep and poll for an instance's disk to sync.
2335 if not instance.disks:
2339 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2341 node = instance.primary_node
2343 for dev in instance.disks:
2344 lu.cfg.SetDiskID(dev, node)
2346 # TODO: Convert to utils.Retry
2349 degr_retries = 10 # in seconds, as we sleep 1 second each time
2353 cumul_degraded = False
2354 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2355 msg = rstats.fail_msg
2357 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2360 raise errors.RemoteError("Can't contact node %s for mirror data,"
2361 " aborting." % node)
2364 rstats = rstats.payload
2366 for i, mstat in enumerate(rstats):
2368 lu.LogWarning("Can't compute data for node %s/%s",
2369 node, instance.disks[i].iv_name)
2372 cumul_degraded = (cumul_degraded or
2373 (mstat.is_degraded and mstat.sync_percent is None))
2374 if mstat.sync_percent is not None:
2376 if mstat.estimated_time is not None:
2377 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2378 max_time = mstat.estimated_time
2380 rem_time = "no time estimate"
2381 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2382 (instance.disks[i].iv_name, mstat.sync_percent,
2385 # if we're done but degraded, let's do a few small retries, to
2386 # make sure we see a stable and not transient situation; therefore
2387 # we force restart of the loop
2388 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2389 logging.info("Degraded disks found, %d retries left", degr_retries)
2397 time.sleep(min(60, max_time))
2400 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2401 return not cumul_degraded
2404 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2405 """Check that mirrors are not degraded.
2407 The ldisk parameter, if True, will change the test from the
2408 is_degraded attribute (which represents overall non-ok status for
2409 the device(s)) to the ldisk (representing the local storage status).
2412 lu.cfg.SetDiskID(dev, node)
2416 if on_primary or dev.AssembleOnSecondary():
2417 rstats = lu.rpc.call_blockdev_find(node, dev)
2418 msg = rstats.fail_msg
2420 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2422 elif not rstats.payload:
2423 lu.LogWarning("Can't find disk on node %s", node)
2427 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2429 result = result and not rstats.payload.is_degraded
2432 for child in dev.children:
2433 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2438 class LUDiagnoseOS(NoHooksLU):
2439 """Logical unit for OS diagnose/query.
2442 _OP_REQP = ["output_fields", "names"]
2444 _FIELDS_STATIC = utils.FieldSet()
2445 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2446 # Fields that need calculation of global os validity
2447 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2449 def ExpandNames(self):
2451 raise errors.OpPrereqError("Selective OS query not supported",
2454 _CheckOutputFields(static=self._FIELDS_STATIC,
2455 dynamic=self._FIELDS_DYNAMIC,
2456 selected=self.op.output_fields)
2458 # Lock all nodes, in shared mode
2459 # Temporary removal of locks, should be reverted later
2460 # TODO: reintroduce locks when they are lighter-weight
2461 self.needed_locks = {}
2462 #self.share_locks[locking.LEVEL_NODE] = 1
2463 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2465 def CheckPrereq(self):
2466 """Check prerequisites.
2471 def _DiagnoseByOS(rlist):
2472 """Remaps a per-node return list into an a per-os per-node dictionary
2474 @param rlist: a map with node names as keys and OS objects as values
2477 @return: a dictionary with osnames as keys and as value another map, with
2478 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2480 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2481 (/srv/..., False, "invalid api")],
2482 "node2": [(/srv/..., True, "")]}
2487 # we build here the list of nodes that didn't fail the RPC (at RPC
2488 # level), so that nodes with a non-responding node daemon don't
2489 # make all OSes invalid
2490 good_nodes = [node_name for node_name in rlist
2491 if not rlist[node_name].fail_msg]
2492 for node_name, nr in rlist.items():
2493 if nr.fail_msg or not nr.payload:
2495 for name, path, status, diagnose, variants in nr.payload:
2496 if name not in all_os:
2497 # build a list of nodes for this os containing empty lists
2498 # for each node in node_list
2500 for nname in good_nodes:
2501 all_os[name][nname] = []
2502 all_os[name][node_name].append((path, status, diagnose, variants))
2505 def Exec(self, feedback_fn):
2506 """Compute the list of OSes.
2509 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2510 node_data = self.rpc.call_os_diagnose(valid_nodes)
2511 pol = self._DiagnoseByOS(node_data)
2513 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2514 calc_variants = "variants" in self.op.output_fields
2516 for os_name, os_data in pol.items():
2521 for osl in os_data.values():
2522 valid = valid and osl and osl[0][1]
2527 node_variants = osl[0][3]
2528 if variants is None:
2529 variants = node_variants
2531 variants = [v for v in variants if v in node_variants]
2533 for field in self.op.output_fields:
2536 elif field == "valid":
2538 elif field == "node_status":
2539 # this is just a copy of the dict
2541 for node_name, nos_list in os_data.items():
2542 val[node_name] = nos_list
2543 elif field == "variants":
2546 raise errors.ParameterError(field)
2553 class LURemoveNode(LogicalUnit):
2554 """Logical unit for removing a node.
2557 HPATH = "node-remove"
2558 HTYPE = constants.HTYPE_NODE
2559 _OP_REQP = ["node_name"]
2561 def BuildHooksEnv(self):
2564 This doesn't run on the target node in the pre phase as a failed
2565 node would then be impossible to remove.
2569 "OP_TARGET": self.op.node_name,
2570 "NODE_NAME": self.op.node_name,
2572 all_nodes = self.cfg.GetNodeList()
2574 all_nodes.remove(self.op.node_name)
2576 logging.warning("Node %s which is about to be removed not found"
2577 " in the all nodes list", self.op.node_name)
2578 return env, all_nodes, all_nodes
2580 def CheckPrereq(self):
2581 """Check prerequisites.
2584 - the node exists in the configuration
2585 - it does not have primary or secondary instances
2586 - it's not the master
2588 Any errors are signaled by raising errors.OpPrereqError.
2591 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2592 node = self.cfg.GetNodeInfo(self.op.node_name)
2593 assert node is not None
2595 instance_list = self.cfg.GetInstanceList()
2597 masternode = self.cfg.GetMasterNode()
2598 if node.name == masternode:
2599 raise errors.OpPrereqError("Node is the master node,"
2600 " you need to failover first.",
2603 for instance_name in instance_list:
2604 instance = self.cfg.GetInstanceInfo(instance_name)
2605 if node.name in instance.all_nodes:
2606 raise errors.OpPrereqError("Instance %s is still running on the node,"
2607 " please remove first." % instance_name,
2609 self.op.node_name = node.name
2612 def Exec(self, feedback_fn):
2613 """Removes the node from the cluster.
2617 logging.info("Stopping the node daemon and removing configs from node %s",
2620 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2622 # Promote nodes to master candidate as needed
2623 _AdjustCandidatePool(self, exceptions=[node.name])
2624 self.context.RemoveNode(node.name)
2626 # Run post hooks on the node before it's removed
2627 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2629 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2631 # pylint: disable-msg=W0702
2632 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2634 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2635 msg = result.fail_msg
2637 self.LogWarning("Errors encountered on the remote node while leaving"
2638 " the cluster: %s", msg)
2641 class LUQueryNodes(NoHooksLU):
2642 """Logical unit for querying nodes.
2645 # pylint: disable-msg=W0142
2646 _OP_REQP = ["output_fields", "names", "use_locking"]
2649 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2650 "master_candidate", "offline", "drained"]
2652 _FIELDS_DYNAMIC = utils.FieldSet(
2654 "mtotal", "mnode", "mfree",
2656 "ctotal", "cnodes", "csockets",
2659 _FIELDS_STATIC = utils.FieldSet(*[
2660 "pinst_cnt", "sinst_cnt",
2661 "pinst_list", "sinst_list",
2662 "pip", "sip", "tags",
2664 "role"] + _SIMPLE_FIELDS
2667 def ExpandNames(self):
2668 _CheckOutputFields(static=self._FIELDS_STATIC,
2669 dynamic=self._FIELDS_DYNAMIC,
2670 selected=self.op.output_fields)
2672 self.needed_locks = {}
2673 self.share_locks[locking.LEVEL_NODE] = 1
2676 self.wanted = _GetWantedNodes(self, self.op.names)
2678 self.wanted = locking.ALL_SET
2680 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2681 self.do_locking = self.do_node_query and self.op.use_locking
2683 # if we don't request only static fields, we need to lock the nodes
2684 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2686 def CheckPrereq(self):
2687 """Check prerequisites.
2690 # The validation of the node list is done in the _GetWantedNodes,
2691 # if non empty, and if empty, there's no validation to do
2694 def Exec(self, feedback_fn):
2695 """Computes the list of nodes and their attributes.
2698 all_info = self.cfg.GetAllNodesInfo()
2700 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2701 elif self.wanted != locking.ALL_SET:
2702 nodenames = self.wanted
2703 missing = set(nodenames).difference(all_info.keys())
2705 raise errors.OpExecError(
2706 "Some nodes were removed before retrieving their data: %s" % missing)
2708 nodenames = all_info.keys()
2710 nodenames = utils.NiceSort(nodenames)
2711 nodelist = [all_info[name] for name in nodenames]
2713 # begin data gathering
2715 if self.do_node_query:
2717 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2718 self.cfg.GetHypervisorType())
2719 for name in nodenames:
2720 nodeinfo = node_data[name]
2721 if not nodeinfo.fail_msg and nodeinfo.payload:
2722 nodeinfo = nodeinfo.payload
2723 fn = utils.TryConvert
2725 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2726 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2727 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2728 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2729 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2730 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2731 "bootid": nodeinfo.get('bootid', None),
2732 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2733 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2736 live_data[name] = {}
2738 live_data = dict.fromkeys(nodenames, {})
2740 node_to_primary = dict([(name, set()) for name in nodenames])
2741 node_to_secondary = dict([(name, set()) for name in nodenames])
2743 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2744 "sinst_cnt", "sinst_list"))
2745 if inst_fields & frozenset(self.op.output_fields):
2746 inst_data = self.cfg.GetAllInstancesInfo()
2748 for inst in inst_data.values():
2749 if inst.primary_node in node_to_primary:
2750 node_to_primary[inst.primary_node].add(inst.name)
2751 for secnode in inst.secondary_nodes:
2752 if secnode in node_to_secondary:
2753 node_to_secondary[secnode].add(inst.name)
2755 master_node = self.cfg.GetMasterNode()
2757 # end data gathering
2760 for node in nodelist:
2762 for field in self.op.output_fields:
2763 if field in self._SIMPLE_FIELDS:
2764 val = getattr(node, field)
2765 elif field == "pinst_list":
2766 val = list(node_to_primary[node.name])
2767 elif field == "sinst_list":
2768 val = list(node_to_secondary[node.name])
2769 elif field == "pinst_cnt":
2770 val = len(node_to_primary[node.name])
2771 elif field == "sinst_cnt":
2772 val = len(node_to_secondary[node.name])
2773 elif field == "pip":
2774 val = node.primary_ip
2775 elif field == "sip":
2776 val = node.secondary_ip
2777 elif field == "tags":
2778 val = list(node.GetTags())
2779 elif field == "master":
2780 val = node.name == master_node
2781 elif self._FIELDS_DYNAMIC.Matches(field):
2782 val = live_data[node.name].get(field, None)
2783 elif field == "role":
2784 if node.name == master_node:
2786 elif node.master_candidate:
2795 raise errors.ParameterError(field)
2796 node_output.append(val)
2797 output.append(node_output)
2802 class LUQueryNodeVolumes(NoHooksLU):
2803 """Logical unit for getting volumes on node(s).
2806 _OP_REQP = ["nodes", "output_fields"]
2808 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2809 _FIELDS_STATIC = utils.FieldSet("node")
2811 def ExpandNames(self):
2812 _CheckOutputFields(static=self._FIELDS_STATIC,
2813 dynamic=self._FIELDS_DYNAMIC,
2814 selected=self.op.output_fields)
2816 self.needed_locks = {}
2817 self.share_locks[locking.LEVEL_NODE] = 1
2818 if not self.op.nodes:
2819 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2821 self.needed_locks[locking.LEVEL_NODE] = \
2822 _GetWantedNodes(self, self.op.nodes)
2824 def CheckPrereq(self):
2825 """Check prerequisites.
2827 This checks that the fields required are valid output fields.
2830 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2832 def Exec(self, feedback_fn):
2833 """Computes the list of nodes and their attributes.
2836 nodenames = self.nodes
2837 volumes = self.rpc.call_node_volumes(nodenames)
2839 ilist = [self.cfg.GetInstanceInfo(iname) for iname
2840 in self.cfg.GetInstanceList()]
2842 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2845 for node in nodenames:
2846 nresult = volumes[node]
2849 msg = nresult.fail_msg
2851 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2854 node_vols = nresult.payload[:]
2855 node_vols.sort(key=lambda vol: vol['dev'])
2857 for vol in node_vols:
2859 for field in self.op.output_fields:
2862 elif field == "phys":
2866 elif field == "name":
2868 elif field == "size":
2869 val = int(float(vol['size']))
2870 elif field == "instance":
2872 if node not in lv_by_node[inst]:
2874 if vol['name'] in lv_by_node[inst][node]:
2880 raise errors.ParameterError(field)
2881 node_output.append(str(val))
2883 output.append(node_output)
2888 class LUQueryNodeStorage(NoHooksLU):
2889 """Logical unit for getting information on storage units on node(s).
2892 _OP_REQP = ["nodes", "storage_type", "output_fields"]
2894 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2896 def ExpandNames(self):
2897 storage_type = self.op.storage_type
2899 if storage_type not in constants.VALID_STORAGE_TYPES:
2900 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2903 _CheckOutputFields(static=self._FIELDS_STATIC,
2904 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2905 selected=self.op.output_fields)
2907 self.needed_locks = {}
2908 self.share_locks[locking.LEVEL_NODE] = 1
2911 self.needed_locks[locking.LEVEL_NODE] = \
2912 _GetWantedNodes(self, self.op.nodes)
2914 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2916 def CheckPrereq(self):
2917 """Check prerequisites.
2919 This checks that the fields required are valid output fields.
2922 self.op.name = getattr(self.op, "name", None)
2924 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2926 def Exec(self, feedback_fn):
2927 """Computes the list of nodes and their attributes.
2930 # Always get name to sort by
2931 if constants.SF_NAME in self.op.output_fields:
2932 fields = self.op.output_fields[:]
2934 fields = [constants.SF_NAME] + self.op.output_fields
2936 # Never ask for node or type as it's only known to the LU
2937 for extra in [constants.SF_NODE, constants.SF_TYPE]:
2938 while extra in fields:
2939 fields.remove(extra)
2941 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2942 name_idx = field_idx[constants.SF_NAME]
2944 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2945 data = self.rpc.call_storage_list(self.nodes,
2946 self.op.storage_type, st_args,
2947 self.op.name, fields)
2951 for node in utils.NiceSort(self.nodes):
2952 nresult = data[node]
2956 msg = nresult.fail_msg
2958 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2961 rows = dict([(row[name_idx], row) for row in nresult.payload])
2963 for name in utils.NiceSort(rows.keys()):
2968 for field in self.op.output_fields:
2969 if field == constants.SF_NODE:
2971 elif field == constants.SF_TYPE:
2972 val = self.op.storage_type
2973 elif field in field_idx:
2974 val = row[field_idx[field]]
2976 raise errors.ParameterError(field)
2985 class LUModifyNodeStorage(NoHooksLU):
2986 """Logical unit for modifying a storage volume on a node.
2989 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2992 def CheckArguments(self):
2993 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
2995 storage_type = self.op.storage_type
2996 if storage_type not in constants.VALID_STORAGE_TYPES:
2997 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3000 def ExpandNames(self):
3001 self.needed_locks = {
3002 locking.LEVEL_NODE: self.op.node_name,
3005 def CheckPrereq(self):
3006 """Check prerequisites.
3009 storage_type = self.op.storage_type
3012 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3014 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3015 " modified" % storage_type,
3018 diff = set(self.op.changes.keys()) - modifiable
3020 raise errors.OpPrereqError("The following fields can not be modified for"
3021 " storage units of type '%s': %r" %
3022 (storage_type, list(diff)),
3025 def Exec(self, feedback_fn):
3026 """Computes the list of nodes and their attributes.
3029 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3030 result = self.rpc.call_storage_modify(self.op.node_name,
3031 self.op.storage_type, st_args,
3032 self.op.name, self.op.changes)
3033 result.Raise("Failed to modify storage unit '%s' on %s" %
3034 (self.op.name, self.op.node_name))
3037 class LUAddNode(LogicalUnit):
3038 """Logical unit for adding node to the cluster.
3042 HTYPE = constants.HTYPE_NODE
3043 _OP_REQP = ["node_name"]
3045 def CheckArguments(self):
3046 # validate/normalize the node name
3047 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3049 def BuildHooksEnv(self):
3052 This will run on all nodes before, and on all nodes + the new node after.
3056 "OP_TARGET": self.op.node_name,
3057 "NODE_NAME": self.op.node_name,
3058 "NODE_PIP": self.op.primary_ip,
3059 "NODE_SIP": self.op.secondary_ip,
3061 nodes_0 = self.cfg.GetNodeList()
3062 nodes_1 = nodes_0 + [self.op.node_name, ]
3063 return env, nodes_0, nodes_1
3065 def CheckPrereq(self):
3066 """Check prerequisites.
3069 - the new node is not already in the config
3071 - its parameters (single/dual homed) matches the cluster
3073 Any errors are signaled by raising errors.OpPrereqError.
3076 node_name = self.op.node_name
3079 dns_data = utils.GetHostInfo(node_name)
3081 node = dns_data.name
3082 primary_ip = self.op.primary_ip = dns_data.ip
3083 secondary_ip = getattr(self.op, "secondary_ip", None)
3084 if secondary_ip is None:
3085 secondary_ip = primary_ip
3086 if not utils.IsValidIP(secondary_ip):
3087 raise errors.OpPrereqError("Invalid secondary IP given",
3089 self.op.secondary_ip = secondary_ip
3091 node_list = cfg.GetNodeList()
3092 if not self.op.readd and node in node_list:
3093 raise errors.OpPrereqError("Node %s is already in the configuration" %
3094 node, errors.ECODE_EXISTS)
3095 elif self.op.readd and node not in node_list:
3096 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3099 for existing_node_name in node_list:
3100 existing_node = cfg.GetNodeInfo(existing_node_name)
3102 if self.op.readd and node == existing_node_name:
3103 if (existing_node.primary_ip != primary_ip or
3104 existing_node.secondary_ip != secondary_ip):
3105 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3106 " address configuration as before",
3110 if (existing_node.primary_ip == primary_ip or
3111 existing_node.secondary_ip == primary_ip or
3112 existing_node.primary_ip == secondary_ip or
3113 existing_node.secondary_ip == secondary_ip):
3114 raise errors.OpPrereqError("New node ip address(es) conflict with"
3115 " existing node %s" % existing_node.name,
3116 errors.ECODE_NOTUNIQUE)
3118 # check that the type of the node (single versus dual homed) is the
3119 # same as for the master
3120 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3121 master_singlehomed = myself.secondary_ip == myself.primary_ip
3122 newbie_singlehomed = secondary_ip == primary_ip
3123 if master_singlehomed != newbie_singlehomed:
3124 if master_singlehomed:
3125 raise errors.OpPrereqError("The master has no private ip but the"
3126 " new node has one",
3129 raise errors.OpPrereqError("The master has a private ip but the"
3130 " new node doesn't have one",
3133 # checks reachability
3134 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3135 raise errors.OpPrereqError("Node not reachable by ping",
3136 errors.ECODE_ENVIRON)
3138 if not newbie_singlehomed:
3139 # check reachability from my secondary ip to newbie's secondary ip
3140 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3141 source=myself.secondary_ip):
3142 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3143 " based ping to noded port",
3144 errors.ECODE_ENVIRON)
3151 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3154 self.new_node = self.cfg.GetNodeInfo(node)
3155 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3157 self.new_node = objects.Node(name=node,
3158 primary_ip=primary_ip,
3159 secondary_ip=secondary_ip,
3160 master_candidate=self.master_candidate,
3161 offline=False, drained=False)
3163 def Exec(self, feedback_fn):
3164 """Adds the new node to the cluster.
3167 new_node = self.new_node
3168 node = new_node.name
3170 # for re-adds, reset the offline/drained/master-candidate flags;
3171 # we need to reset here, otherwise offline would prevent RPC calls
3172 # later in the procedure; this also means that if the re-add
3173 # fails, we are left with a non-offlined, broken node
3175 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3176 self.LogInfo("Readding a node, the offline/drained flags were reset")
3177 # if we demote the node, we do cleanup later in the procedure
3178 new_node.master_candidate = self.master_candidate
3180 # notify the user about any possible mc promotion
3181 if new_node.master_candidate:
3182 self.LogInfo("Node will be a master candidate")
3184 # check connectivity
3185 result = self.rpc.call_version([node])[node]
3186 result.Raise("Can't get version information from node %s" % node)
3187 if constants.PROTOCOL_VERSION == result.payload:
3188 logging.info("Communication to node %s fine, sw version %s match",
3189 node, result.payload)
3191 raise errors.OpExecError("Version mismatch master version %s,"
3192 " node version %s" %
3193 (constants.PROTOCOL_VERSION, result.payload))
3196 if self.cfg.GetClusterInfo().modify_ssh_setup:
3197 logging.info("Copy ssh key to node %s", node)
3198 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3200 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3201 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3205 keyarray.append(utils.ReadFile(i))
3207 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3208 keyarray[2], keyarray[3], keyarray[4],
3210 result.Raise("Cannot transfer ssh keys to the new node")
3212 # Add node to our /etc/hosts, and add key to known_hosts
3213 if self.cfg.GetClusterInfo().modify_etc_hosts:
3214 utils.AddHostToEtcHosts(new_node.name)
3216 if new_node.secondary_ip != new_node.primary_ip:
3217 result = self.rpc.call_node_has_ip_address(new_node.name,
3218 new_node.secondary_ip)
3219 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3220 prereq=True, ecode=errors.ECODE_ENVIRON)
3221 if not result.payload:
3222 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3223 " you gave (%s). Please fix and re-run this"
3224 " command." % new_node.secondary_ip)
3226 node_verify_list = [self.cfg.GetMasterNode()]
3227 node_verify_param = {
3228 constants.NV_NODELIST: [node],
3229 # TODO: do a node-net-test as well?
3232 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3233 self.cfg.GetClusterName())
3234 for verifier in node_verify_list:
3235 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3236 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3238 for failed in nl_payload:
3239 feedback_fn("ssh/hostname verification failed"
3240 " (checking from %s): %s" %
3241 (verifier, nl_payload[failed]))
3242 raise errors.OpExecError("ssh/hostname verification failed.")
3245 _RedistributeAncillaryFiles(self)
3246 self.context.ReaddNode(new_node)
3247 # make sure we redistribute the config
3248 self.cfg.Update(new_node, feedback_fn)
3249 # and make sure the new node will not have old files around
3250 if not new_node.master_candidate:
3251 result = self.rpc.call_node_demote_from_mc(new_node.name)
3252 msg = result.fail_msg
3254 self.LogWarning("Node failed to demote itself from master"
3255 " candidate status: %s" % msg)
3257 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3258 self.context.AddNode(new_node, self.proc.GetECId())
3261 class LUSetNodeParams(LogicalUnit):
3262 """Modifies the parameters of a node.
3265 HPATH = "node-modify"
3266 HTYPE = constants.HTYPE_NODE
3267 _OP_REQP = ["node_name"]
3270 def CheckArguments(self):
3271 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3272 _CheckBooleanOpField(self.op, 'master_candidate')
3273 _CheckBooleanOpField(self.op, 'offline')
3274 _CheckBooleanOpField(self.op, 'drained')
3275 _CheckBooleanOpField(self.op, 'auto_promote')
3276 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3277 if all_mods.count(None) == 3:
3278 raise errors.OpPrereqError("Please pass at least one modification",
3280 if all_mods.count(True) > 1:
3281 raise errors.OpPrereqError("Can't set the node into more than one"
3282 " state at the same time",
3285 # Boolean value that tells us whether we're offlining or draining the node
3286 self.offline_or_drain = (self.op.offline == True or
3287 self.op.drained == True)
3288 self.deoffline_or_drain = (self.op.offline == False or
3289 self.op.drained == False)
3290 self.might_demote = (self.op.master_candidate == False or
3291 self.offline_or_drain)
3293 self.lock_all = self.op.auto_promote and self.might_demote
3296 def ExpandNames(self):
3298 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3300 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3302 def BuildHooksEnv(self):
3305 This runs on the master node.
3309 "OP_TARGET": self.op.node_name,
3310 "MASTER_CANDIDATE": str(self.op.master_candidate),
3311 "OFFLINE": str(self.op.offline),
3312 "DRAINED": str(self.op.drained),
3314 nl = [self.cfg.GetMasterNode(),
3318 def CheckPrereq(self):
3319 """Check prerequisites.
3321 This only checks the instance list against the existing names.
3324 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3326 if (self.op.master_candidate is not None or
3327 self.op.drained is not None or
3328 self.op.offline is not None):
3329 # we can't change the master's node flags
3330 if self.op.node_name == self.cfg.GetMasterNode():
3331 raise errors.OpPrereqError("The master role can be changed"
3332 " only via masterfailover",
3336 if node.master_candidate and self.might_demote and not self.lock_all:
3337 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3338 # check if after removing the current node, we're missing master
3340 (mc_remaining, mc_should, _) = \
3341 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3342 if mc_remaining != mc_should:
3343 raise errors.OpPrereqError("Not enough master candidates, please"
3344 " pass auto_promote to allow promotion",
3347 if (self.op.master_candidate == True and
3348 ((node.offline and not self.op.offline == False) or
3349 (node.drained and not self.op.drained == False))):
3350 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3351 " to master_candidate" % node.name,
3354 # If we're being deofflined/drained, we'll MC ourself if needed
3355 if (self.deoffline_or_drain and not self.offline_or_drain and not
3356 self.op.master_candidate == True and not node.master_candidate):
3357 self.op.master_candidate = _DecideSelfPromotion(self)
3358 if self.op.master_candidate:
3359 self.LogInfo("Autopromoting node to master candidate")
3363 def Exec(self, feedback_fn):
3372 if self.op.offline is not None:
3373 node.offline = self.op.offline
3374 result.append(("offline", str(self.op.offline)))
3375 if self.op.offline == True:
3376 if node.master_candidate:
3377 node.master_candidate = False
3379 result.append(("master_candidate", "auto-demotion due to offline"))
3381 node.drained = False
3382 result.append(("drained", "clear drained status due to offline"))
3384 if self.op.master_candidate is not None:
3385 node.master_candidate = self.op.master_candidate
3387 result.append(("master_candidate", str(self.op.master_candidate)))
3388 if self.op.master_candidate == False:
3389 rrc = self.rpc.call_node_demote_from_mc(node.name)
3392 self.LogWarning("Node failed to demote itself: %s" % msg)
3394 if self.op.drained is not None:
3395 node.drained = self.op.drained
3396 result.append(("drained", str(self.op.drained)))
3397 if self.op.drained == True:
3398 if node.master_candidate:
3399 node.master_candidate = False
3401 result.append(("master_candidate", "auto-demotion due to drain"))
3402 rrc = self.rpc.call_node_demote_from_mc(node.name)
3405 self.LogWarning("Node failed to demote itself: %s" % msg)
3407 node.offline = False
3408 result.append(("offline", "clear offline status due to drain"))
3410 # we locked all nodes, we adjust the CP before updating this node
3412 _AdjustCandidatePool(self, [node.name])
3414 # this will trigger configuration file update, if needed
3415 self.cfg.Update(node, feedback_fn)
3417 # this will trigger job queue propagation or cleanup
3419 self.context.ReaddNode(node)
3424 class LUPowercycleNode(NoHooksLU):
3425 """Powercycles a node.
3428 _OP_REQP = ["node_name", "force"]
3431 def CheckArguments(self):
3432 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3433 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3434 raise errors.OpPrereqError("The node is the master and the force"
3435 " parameter was not set",
3438 def ExpandNames(self):
3439 """Locking for PowercycleNode.
3441 This is a last-resort option and shouldn't block on other
3442 jobs. Therefore, we grab no locks.
3445 self.needed_locks = {}
3447 def CheckPrereq(self):
3448 """Check prerequisites.
3450 This LU has no prereqs.
3455 def Exec(self, feedback_fn):
3459 result = self.rpc.call_node_powercycle(self.op.node_name,
3460 self.cfg.GetHypervisorType())
3461 result.Raise("Failed to schedule the reboot")
3462 return result.payload
3465 class LUQueryClusterInfo(NoHooksLU):
3466 """Query cluster configuration.
3472 def ExpandNames(self):
3473 self.needed_locks = {}
3475 def CheckPrereq(self):
3476 """No prerequsites needed for this LU.
3481 def Exec(self, feedback_fn):
3482 """Return cluster config.
3485 cluster = self.cfg.GetClusterInfo()
3488 # Filter just for enabled hypervisors
3489 for os_name, hv_dict in cluster.os_hvp.items():
3490 os_hvp[os_name] = {}
3491 for hv_name, hv_params in hv_dict.items():
3492 if hv_name in cluster.enabled_hypervisors:
3493 os_hvp[os_name][hv_name] = hv_params
3496 "software_version": constants.RELEASE_VERSION,
3497 "protocol_version": constants.PROTOCOL_VERSION,
3498 "config_version": constants.CONFIG_VERSION,
3499 "os_api_version": max(constants.OS_API_VERSIONS),
3500 "export_version": constants.EXPORT_VERSION,
3501 "architecture": (platform.architecture()[0], platform.machine()),
3502 "name": cluster.cluster_name,
3503 "master": cluster.master_node,
3504 "default_hypervisor": cluster.enabled_hypervisors[0],
3505 "enabled_hypervisors": cluster.enabled_hypervisors,
3506 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3507 for hypervisor_name in cluster.enabled_hypervisors]),
3509 "beparams": cluster.beparams,
3510 "nicparams": cluster.nicparams,
3511 "candidate_pool_size": cluster.candidate_pool_size,
3512 "master_netdev": cluster.master_netdev,
3513 "volume_group_name": cluster.volume_group_name,
3514 "file_storage_dir": cluster.file_storage_dir,
3515 "ctime": cluster.ctime,
3516 "mtime": cluster.mtime,
3517 "uuid": cluster.uuid,
3518 "tags": list(cluster.GetTags()),
3524 class LUQueryConfigValues(NoHooksLU):
3525 """Return configuration values.
3530 _FIELDS_DYNAMIC = utils.FieldSet()
3531 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3534 def ExpandNames(self):
3535 self.needed_locks = {}
3537 _CheckOutputFields(static=self._FIELDS_STATIC,
3538 dynamic=self._FIELDS_DYNAMIC,
3539 selected=self.op.output_fields)
3541 def CheckPrereq(self):
3542 """No prerequisites.
3547 def Exec(self, feedback_fn):
3548 """Dump a representation of the cluster config to the standard output.
3552 for field in self.op.output_fields:
3553 if field == "cluster_name":
3554 entry = self.cfg.GetClusterName()
3555 elif field == "master_node":
3556 entry = self.cfg.GetMasterNode()
3557 elif field == "drain_flag":
3558 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3559 elif field == "watcher_pause":
3560 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3562 raise errors.ParameterError(field)
3563 values.append(entry)
3567 class LUActivateInstanceDisks(NoHooksLU):
3568 """Bring up an instance's disks.
3571 _OP_REQP = ["instance_name"]
3574 def ExpandNames(self):
3575 self._ExpandAndLockInstance()
3576 self.needed_locks[locking.LEVEL_NODE] = []
3577 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3579 def DeclareLocks(self, level):
3580 if level == locking.LEVEL_NODE:
3581 self._LockInstancesNodes()
3583 def CheckPrereq(self):
3584 """Check prerequisites.
3586 This checks that the instance is in the cluster.
3589 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3590 assert self.instance is not None, \
3591 "Cannot retrieve locked instance %s" % self.op.instance_name
3592 _CheckNodeOnline(self, self.instance.primary_node)
3593 if not hasattr(self.op, "ignore_size"):
3594 self.op.ignore_size = False
3596 def Exec(self, feedback_fn):
3597 """Activate the disks.
3600 disks_ok, disks_info = \
3601 _AssembleInstanceDisks(self, self.instance,
3602 ignore_size=self.op.ignore_size)
3604 raise errors.OpExecError("Cannot activate block devices")
3609 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3611 """Prepare the block devices for an instance.
3613 This sets up the block devices on all nodes.
3615 @type lu: L{LogicalUnit}
3616 @param lu: the logical unit on whose behalf we execute
3617 @type instance: L{objects.Instance}
3618 @param instance: the instance for whose disks we assemble
3619 @type ignore_secondaries: boolean
3620 @param ignore_secondaries: if true, errors on secondary nodes
3621 won't result in an error return from the function
3622 @type ignore_size: boolean
3623 @param ignore_size: if true, the current known size of the disk
3624 will not be used during the disk activation, useful for cases
3625 when the size is wrong
3626 @return: False if the operation failed, otherwise a list of
3627 (host, instance_visible_name, node_visible_name)
3628 with the mapping from node devices to instance devices
3633 iname = instance.name
3634 # With the two passes mechanism we try to reduce the window of
3635 # opportunity for the race condition of switching DRBD to primary
3636 # before handshaking occured, but we do not eliminate it
3638 # The proper fix would be to wait (with some limits) until the
3639 # connection has been made and drbd transitions from WFConnection
3640 # into any other network-connected state (Connected, SyncTarget,
3643 # 1st pass, assemble on all nodes in secondary mode
3644 for inst_disk in instance.disks:
3645 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3647 node_disk = node_disk.Copy()
3648 node_disk.UnsetSize()
3649 lu.cfg.SetDiskID(node_disk, node)
3650 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3651 msg = result.fail_msg
3653 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3654 " (is_primary=False, pass=1): %s",
3655 inst_disk.iv_name, node, msg)
3656 if not ignore_secondaries:
3659 # FIXME: race condition on drbd migration to primary
3661 # 2nd pass, do only the primary node
3662 for inst_disk in instance.disks:
3665 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3666 if node != instance.primary_node:
3669 node_disk = node_disk.Copy()
3670 node_disk.UnsetSize()
3671 lu.cfg.SetDiskID(node_disk, node)
3672 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3673 msg = result.fail_msg
3675 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3676 " (is_primary=True, pass=2): %s",
3677 inst_disk.iv_name, node, msg)
3680 dev_path = result.payload
3682 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3684 # leave the disks configured for the primary node
3685 # this is a workaround that would be fixed better by
3686 # improving the logical/physical id handling
3687 for disk in instance.disks:
3688 lu.cfg.SetDiskID(disk, instance.primary_node)
3690 return disks_ok, device_info
3693 def _StartInstanceDisks(lu, instance, force):
3694 """Start the disks of an instance.
3697 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3698 ignore_secondaries=force)
3700 _ShutdownInstanceDisks(lu, instance)
3701 if force is not None and not force:
3702 lu.proc.LogWarning("", hint="If the message above refers to a"
3704 " you can retry the operation using '--force'.")
3705 raise errors.OpExecError("Disk consistency error")
3708 class LUDeactivateInstanceDisks(NoHooksLU):
3709 """Shutdown an instance's disks.
3712 _OP_REQP = ["instance_name"]
3715 def ExpandNames(self):
3716 self._ExpandAndLockInstance()
3717 self.needed_locks[locking.LEVEL_NODE] = []
3718 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3720 def DeclareLocks(self, level):
3721 if level == locking.LEVEL_NODE:
3722 self._LockInstancesNodes()
3724 def CheckPrereq(self):
3725 """Check prerequisites.
3727 This checks that the instance is in the cluster.
3730 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3731 assert self.instance is not None, \
3732 "Cannot retrieve locked instance %s" % self.op.instance_name
3734 def Exec(self, feedback_fn):
3735 """Deactivate the disks
3738 instance = self.instance
3739 _SafeShutdownInstanceDisks(self, instance)
3742 def _SafeShutdownInstanceDisks(lu, instance):
3743 """Shutdown block devices of an instance.
3745 This function checks if an instance is running, before calling
3746 _ShutdownInstanceDisks.
3749 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3750 _ShutdownInstanceDisks(lu, instance)
3753 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3754 """Shutdown block devices of an instance.
3756 This does the shutdown on all nodes of the instance.
3758 If the ignore_primary is false, errors on the primary node are
3763 for disk in instance.disks:
3764 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3765 lu.cfg.SetDiskID(top_disk, node)
3766 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3767 msg = result.fail_msg
3769 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3770 disk.iv_name, node, msg)
3771 if not ignore_primary or node != instance.primary_node:
3776 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3777 """Checks if a node has enough free memory.
3779 This function check if a given node has the needed amount of free
3780 memory. In case the node has less memory or we cannot get the
3781 information from the node, this function raise an OpPrereqError
3784 @type lu: C{LogicalUnit}
3785 @param lu: a logical unit from which we get configuration data
3787 @param node: the node to check
3788 @type reason: C{str}
3789 @param reason: string to use in the error message
3790 @type requested: C{int}
3791 @param requested: the amount of memory in MiB to check for
3792 @type hypervisor_name: C{str}
3793 @param hypervisor_name: the hypervisor to ask for memory stats
3794 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3795 we cannot check the node
3798 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3799 nodeinfo[node].Raise("Can't get data from node %s" % node,
3800 prereq=True, ecode=errors.ECODE_ENVIRON)
3801 free_mem = nodeinfo[node].payload.get('memory_free', None)
3802 if not isinstance(free_mem, int):
3803 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3804 " was '%s'" % (node, free_mem),
3805 errors.ECODE_ENVIRON)
3806 if requested > free_mem:
3807 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3808 " needed %s MiB, available %s MiB" %
3809 (node, reason, requested, free_mem),
3813 def _CheckNodesFreeDisk(lu, nodenames, requested):
3814 """Checks if nodes have enough free disk space in the default VG.
3816 This function check if all given nodes have the needed amount of
3817 free disk. In case any node has less disk or we cannot get the
3818 information from the node, this function raise an OpPrereqError
3821 @type lu: C{LogicalUnit}
3822 @param lu: a logical unit from which we get configuration data
3823 @type nodenames: C{list}
3824 @param node: the list of node names to check
3825 @type requested: C{int}
3826 @param requested: the amount of disk in MiB to check for
3827 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
3828 we cannot check the node
3831 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3832 lu.cfg.GetHypervisorType())
3833 for node in nodenames:
3834 info = nodeinfo[node]
3835 info.Raise("Cannot get current information from node %s" % node,
3836 prereq=True, ecode=errors.ECODE_ENVIRON)
3837 vg_free = info.payload.get("vg_free", None)
3838 if not isinstance(vg_free, int):
3839 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
3840 " result was '%s'" % (node, vg_free),
3841 errors.ECODE_ENVIRON)
3842 if requested > vg_free:
3843 raise errors.OpPrereqError("Not enough disk space on target node %s:"
3844 " required %d MiB, available %d MiB" %
3845 (node, requested, vg_free),
3849 class LUStartupInstance(LogicalUnit):
3850 """Starts an instance.
3853 HPATH = "instance-start"
3854 HTYPE = constants.HTYPE_INSTANCE
3855 _OP_REQP = ["instance_name", "force"]
3858 def ExpandNames(self):
3859 self._ExpandAndLockInstance()
3861 def BuildHooksEnv(self):
3864 This runs on master, primary and secondary nodes of the instance.
3868 "FORCE": self.op.force,
3870 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3871 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3874 def CheckPrereq(self):
3875 """Check prerequisites.
3877 This checks that the instance is in the cluster.
3880 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3881 assert self.instance is not None, \
3882 "Cannot retrieve locked instance %s" % self.op.instance_name
3885 self.beparams = getattr(self.op, "beparams", {})
3887 if not isinstance(self.beparams, dict):
3888 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3889 " dict" % (type(self.beparams), ),
3891 # fill the beparams dict
3892 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3893 self.op.beparams = self.beparams
3896 self.hvparams = getattr(self.op, "hvparams", {})
3898 if not isinstance(self.hvparams, dict):
3899 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3900 " dict" % (type(self.hvparams), ),
3903 # check hypervisor parameter syntax (locally)
3904 cluster = self.cfg.GetClusterInfo()
3905 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3906 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3908 filled_hvp.update(self.hvparams)
3909 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3910 hv_type.CheckParameterSyntax(filled_hvp)
3911 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3912 self.op.hvparams = self.hvparams
3914 _CheckNodeOnline(self, instance.primary_node)
3916 bep = self.cfg.GetClusterInfo().FillBE(instance)
3917 # check bridges existence
3918 _CheckInstanceBridgesExist(self, instance)
3920 remote_info = self.rpc.call_instance_info(instance.primary_node,
3922 instance.hypervisor)
3923 remote_info.Raise("Error checking node %s" % instance.primary_node,
3924 prereq=True, ecode=errors.ECODE_ENVIRON)
3925 if not remote_info.payload: # not running already
3926 _CheckNodeFreeMemory(self, instance.primary_node,
3927 "starting instance %s" % instance.name,
3928 bep[constants.BE_MEMORY], instance.hypervisor)
3930 def Exec(self, feedback_fn):
3931 """Start the instance.
3934 instance = self.instance
3935 force = self.op.force
3937 self.cfg.MarkInstanceUp(instance.name)
3939 node_current = instance.primary_node
3941 _StartInstanceDisks(self, instance, force)
3943 result = self.rpc.call_instance_start(node_current, instance,
3944 self.hvparams, self.beparams)
3945 msg = result.fail_msg
3947 _ShutdownInstanceDisks(self, instance)
3948 raise errors.OpExecError("Could not start instance: %s" % msg)
3951 class LURebootInstance(LogicalUnit):
3952 """Reboot an instance.
3955 HPATH = "instance-reboot"
3956 HTYPE = constants.HTYPE_INSTANCE
3957 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3960 def CheckArguments(self):
3961 """Check the arguments.
3964 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3965 constants.DEFAULT_SHUTDOWN_TIMEOUT)
3967 def ExpandNames(self):
3968 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3969 constants.INSTANCE_REBOOT_HARD,
3970 constants.INSTANCE_REBOOT_FULL]:
3971 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3972 (constants.INSTANCE_REBOOT_SOFT,
3973 constants.INSTANCE_REBOOT_HARD,
3974 constants.INSTANCE_REBOOT_FULL))
3975 self._ExpandAndLockInstance()
3977 def BuildHooksEnv(self):
3980 This runs on master, primary and secondary nodes of the instance.
3984 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3985 "REBOOT_TYPE": self.op.reboot_type,
3986 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3988 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3989 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3992 def CheckPrereq(self):
3993 """Check prerequisites.
3995 This checks that the instance is in the cluster.
3998 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3999 assert self.instance is not None, \
4000 "Cannot retrieve locked instance %s" % self.op.instance_name
4002 _CheckNodeOnline(self, instance.primary_node)
4004 # check bridges existence
4005 _CheckInstanceBridgesExist(self, instance)
4007 def Exec(self, feedback_fn):
4008 """Reboot the instance.
4011 instance = self.instance
4012 ignore_secondaries = self.op.ignore_secondaries
4013 reboot_type = self.op.reboot_type
4015 node_current = instance.primary_node
4017 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4018 constants.INSTANCE_REBOOT_HARD]:
4019 for disk in instance.disks:
4020 self.cfg.SetDiskID(disk, node_current)
4021 result = self.rpc.call_instance_reboot(node_current, instance,
4023 self.shutdown_timeout)
4024 result.Raise("Could not reboot instance")
4026 result = self.rpc.call_instance_shutdown(node_current, instance,
4027 self.shutdown_timeout)
4028 result.Raise("Could not shutdown instance for full reboot")
4029 _ShutdownInstanceDisks(self, instance)
4030 _StartInstanceDisks(self, instance, ignore_secondaries)
4031 result = self.rpc.call_instance_start(node_current, instance, None, None)
4032 msg = result.fail_msg
4034 _ShutdownInstanceDisks(self, instance)
4035 raise errors.OpExecError("Could not start instance for"
4036 " full reboot: %s" % msg)
4038 self.cfg.MarkInstanceUp(instance.name)
4041 class LUShutdownInstance(LogicalUnit):
4042 """Shutdown an instance.
4045 HPATH = "instance-stop"
4046 HTYPE = constants.HTYPE_INSTANCE
4047 _OP_REQP = ["instance_name"]
4050 def CheckArguments(self):
4051 """Check the arguments.
4054 self.timeout = getattr(self.op, "timeout",
4055 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4057 def ExpandNames(self):
4058 self._ExpandAndLockInstance()
4060 def BuildHooksEnv(self):
4063 This runs on master, primary and secondary nodes of the instance.
4066 env = _BuildInstanceHookEnvByObject(self, self.instance)
4067 env["TIMEOUT"] = self.timeout
4068 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4071 def CheckPrereq(self):
4072 """Check prerequisites.
4074 This checks that the instance is in the cluster.
4077 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4078 assert self.instance is not None, \
4079 "Cannot retrieve locked instance %s" % self.op.instance_name
4080 _CheckNodeOnline(self, self.instance.primary_node)
4082 def Exec(self, feedback_fn):
4083 """Shutdown the instance.
4086 instance = self.instance
4087 node_current = instance.primary_node
4088 timeout = self.timeout
4089 self.cfg.MarkInstanceDown(instance.name)
4090 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4091 msg = result.fail_msg
4093 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4095 _ShutdownInstanceDisks(self, instance)
4098 class LUReinstallInstance(LogicalUnit):
4099 """Reinstall an instance.
4102 HPATH = "instance-reinstall"
4103 HTYPE = constants.HTYPE_INSTANCE
4104 _OP_REQP = ["instance_name"]
4107 def ExpandNames(self):
4108 self._ExpandAndLockInstance()
4110 def BuildHooksEnv(self):
4113 This runs on master, primary and secondary nodes of the instance.
4116 env = _BuildInstanceHookEnvByObject(self, self.instance)
4117 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4120 def CheckPrereq(self):
4121 """Check prerequisites.
4123 This checks that the instance is in the cluster and is not running.
4126 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4127 assert instance is not None, \
4128 "Cannot retrieve locked instance %s" % self.op.instance_name
4129 _CheckNodeOnline(self, instance.primary_node)
4131 if instance.disk_template == constants.DT_DISKLESS:
4132 raise errors.OpPrereqError("Instance '%s' has no disks" %
4133 self.op.instance_name,
4135 _CheckInstanceDown(self, instance, "cannot reinstall")
4137 self.op.os_type = getattr(self.op, "os_type", None)
4138 self.op.force_variant = getattr(self.op, "force_variant", False)
4139 if self.op.os_type is not None:
4141 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4142 result = self.rpc.call_os_get(pnode, self.op.os_type)
4143 result.Raise("OS '%s' not in supported OS list for primary node %s" %
4144 (self.op.os_type, pnode),
4145 prereq=True, ecode=errors.ECODE_INVAL)
4146 if not self.op.force_variant:
4147 _CheckOSVariant(result.payload, self.op.os_type)
4149 self.instance = instance
4151 def Exec(self, feedback_fn):
4152 """Reinstall the instance.
4155 inst = self.instance
4157 if self.op.os_type is not None:
4158 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4159 inst.os = self.op.os_type
4160 self.cfg.Update(inst, feedback_fn)
4162 _StartInstanceDisks(self, inst, None)
4164 feedback_fn("Running the instance OS create scripts...")
4165 # FIXME: pass debug option from opcode to backend
4166 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4167 self.op.debug_level)
4168 result.Raise("Could not install OS for instance %s on node %s" %
4169 (inst.name, inst.primary_node))
4171 _ShutdownInstanceDisks(self, inst)
4174 class LURecreateInstanceDisks(LogicalUnit):
4175 """Recreate an instance's missing disks.
4178 HPATH = "instance-recreate-disks"
4179 HTYPE = constants.HTYPE_INSTANCE
4180 _OP_REQP = ["instance_name", "disks"]
4183 def CheckArguments(self):
4184 """Check the arguments.
4187 if not isinstance(self.op.disks, list):
4188 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4189 for item in self.op.disks:
4190 if (not isinstance(item, int) or
4192 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4193 str(item), errors.ECODE_INVAL)
4195 def ExpandNames(self):
4196 self._ExpandAndLockInstance()
4198 def BuildHooksEnv(self):
4201 This runs on master, primary and secondary nodes of the instance.
4204 env = _BuildInstanceHookEnvByObject(self, self.instance)
4205 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4208 def CheckPrereq(self):
4209 """Check prerequisites.
4211 This checks that the instance is in the cluster and is not running.
4214 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4215 assert instance is not None, \
4216 "Cannot retrieve locked instance %s" % self.op.instance_name
4217 _CheckNodeOnline(self, instance.primary_node)
4219 if instance.disk_template == constants.DT_DISKLESS:
4220 raise errors.OpPrereqError("Instance '%s' has no disks" %
4221 self.op.instance_name, errors.ECODE_INVAL)
4222 _CheckInstanceDown(self, instance, "cannot recreate disks")
4224 if not self.op.disks:
4225 self.op.disks = range(len(instance.disks))
4227 for idx in self.op.disks:
4228 if idx >= len(instance.disks):
4229 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4232 self.instance = instance
4234 def Exec(self, feedback_fn):
4235 """Recreate the disks.
4239 for idx, _ in enumerate(self.instance.disks):
4240 if idx not in self.op.disks: # disk idx has not been passed in
4244 _CreateDisks(self, self.instance, to_skip=to_skip)
4247 class LURenameInstance(LogicalUnit):
4248 """Rename an instance.
4251 HPATH = "instance-rename"
4252 HTYPE = constants.HTYPE_INSTANCE
4253 _OP_REQP = ["instance_name", "new_name"]
4255 def BuildHooksEnv(self):
4258 This runs on master, primary and secondary nodes of the instance.
4261 env = _BuildInstanceHookEnvByObject(self, self.instance)
4262 env["INSTANCE_NEW_NAME"] = self.op.new_name
4263 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4266 def CheckPrereq(self):
4267 """Check prerequisites.
4269 This checks that the instance is in the cluster and is not running.
4272 self.op.instance_name = _ExpandInstanceName(self.cfg,
4273 self.op.instance_name)
4274 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4275 assert instance is not None
4276 _CheckNodeOnline(self, instance.primary_node)
4277 _CheckInstanceDown(self, instance, "cannot rename")
4278 self.instance = instance
4280 # new name verification
4281 name_info = utils.GetHostInfo(self.op.new_name)
4283 self.op.new_name = new_name = name_info.name
4284 instance_list = self.cfg.GetInstanceList()
4285 if new_name in instance_list:
4286 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4287 new_name, errors.ECODE_EXISTS)
4289 if not getattr(self.op, "ignore_ip", False):
4290 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4291 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4292 (name_info.ip, new_name),
4293 errors.ECODE_NOTUNIQUE)
4296 def Exec(self, feedback_fn):
4297 """Reinstall the instance.
4300 inst = self.instance
4301 old_name = inst.name
4303 if inst.disk_template == constants.DT_FILE:
4304 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4306 self.cfg.RenameInstance(inst.name, self.op.new_name)
4307 # Change the instance lock. This is definitely safe while we hold the BGL
4308 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4309 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4311 # re-read the instance from the configuration after rename
4312 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4314 if inst.disk_template == constants.DT_FILE:
4315 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4316 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4317 old_file_storage_dir,
4318 new_file_storage_dir)
4319 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4320 " (but the instance has been renamed in Ganeti)" %
4321 (inst.primary_node, old_file_storage_dir,
4322 new_file_storage_dir))
4324 _StartInstanceDisks(self, inst, None)
4326 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4327 old_name, self.op.debug_level)
4328 msg = result.fail_msg
4330 msg = ("Could not run OS rename script for instance %s on node %s"
4331 " (but the instance has been renamed in Ganeti): %s" %
4332 (inst.name, inst.primary_node, msg))
4333 self.proc.LogWarning(msg)
4335 _ShutdownInstanceDisks(self, inst)
4338 class LURemoveInstance(LogicalUnit):
4339 """Remove an instance.
4342 HPATH = "instance-remove"
4343 HTYPE = constants.HTYPE_INSTANCE
4344 _OP_REQP = ["instance_name", "ignore_failures"]
4347 def CheckArguments(self):
4348 """Check the arguments.
4351 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4352 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4354 def ExpandNames(self):
4355 self._ExpandAndLockInstance()
4356 self.needed_locks[locking.LEVEL_NODE] = []
4357 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4359 def DeclareLocks(self, level):
4360 if level == locking.LEVEL_NODE:
4361 self._LockInstancesNodes()
4363 def BuildHooksEnv(self):
4366 This runs on master, primary and secondary nodes of the instance.
4369 env = _BuildInstanceHookEnvByObject(self, self.instance)
4370 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4371 nl = [self.cfg.GetMasterNode()]
4372 nl_post = list(self.instance.all_nodes) + nl
4373 return env, nl, nl_post
4375 def CheckPrereq(self):
4376 """Check prerequisites.
4378 This checks that the instance is in the cluster.
4381 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4382 assert self.instance is not None, \
4383 "Cannot retrieve locked instance %s" % self.op.instance_name
4385 def Exec(self, feedback_fn):
4386 """Remove the instance.
4389 instance = self.instance
4390 logging.info("Shutting down instance %s on node %s",
4391 instance.name, instance.primary_node)
4393 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4394 self.shutdown_timeout)
4395 msg = result.fail_msg
4397 if self.op.ignore_failures:
4398 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4400 raise errors.OpExecError("Could not shutdown instance %s on"
4402 (instance.name, instance.primary_node, msg))
4404 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4407 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4408 """Utility function to remove an instance.
4411 logging.info("Removing block devices for instance %s", instance.name)
4413 if not _RemoveDisks(lu, instance):
4414 if not ignore_failures:
4415 raise errors.OpExecError("Can't remove instance's disks")
4416 feedback_fn("Warning: can't remove instance's disks")
4418 logging.info("Removing instance %s out of cluster config", instance.name)
4420 lu.cfg.RemoveInstance(instance.name)
4422 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4423 "Instance lock removal conflict"
4425 # Remove lock for the instance
4426 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4429 class LUQueryInstances(NoHooksLU):
4430 """Logical unit for querying instances.
4433 # pylint: disable-msg=W0142
4434 _OP_REQP = ["output_fields", "names", "use_locking"]
4436 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4437 "serial_no", "ctime", "mtime", "uuid"]
4438 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4440 "disk_template", "ip", "mac", "bridge",
4441 "nic_mode", "nic_link",
4442 "sda_size", "sdb_size", "vcpus", "tags",
4443 "network_port", "beparams",
4444 r"(disk)\.(size)/([0-9]+)",
4445 r"(disk)\.(sizes)", "disk_usage",
4446 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4447 r"(nic)\.(bridge)/([0-9]+)",
4448 r"(nic)\.(macs|ips|modes|links|bridges)",
4449 r"(disk|nic)\.(count)",
4451 ] + _SIMPLE_FIELDS +
4453 for name in constants.HVS_PARAMETERS
4454 if name not in constants.HVC_GLOBALS] +
4456 for name in constants.BES_PARAMETERS])
4457 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4460 def ExpandNames(self):
4461 _CheckOutputFields(static=self._FIELDS_STATIC,
4462 dynamic=self._FIELDS_DYNAMIC,
4463 selected=self.op.output_fields)
4465 self.needed_locks = {}
4466 self.share_locks[locking.LEVEL_INSTANCE] = 1
4467 self.share_locks[locking.LEVEL_NODE] = 1
4470 self.wanted = _GetWantedInstances(self, self.op.names)
4472 self.wanted = locking.ALL_SET
4474 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4475 self.do_locking = self.do_node_query and self.op.use_locking
4477 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4478 self.needed_locks[locking.LEVEL_NODE] = []
4479 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4481 def DeclareLocks(self, level):
4482 if level == locking.LEVEL_NODE and self.do_locking:
4483 self._LockInstancesNodes()
4485 def CheckPrereq(self):
4486 """Check prerequisites.
4491 def Exec(self, feedback_fn):
4492 """Computes the list of nodes and their attributes.
4495 # pylint: disable-msg=R0912
4496 # way too many branches here
4497 all_info = self.cfg.GetAllInstancesInfo()
4498 if self.wanted == locking.ALL_SET:
4499 # caller didn't specify instance names, so ordering is not important
4501 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4503 instance_names = all_info.keys()
4504 instance_names = utils.NiceSort(instance_names)
4506 # caller did specify names, so we must keep the ordering
4508 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4510 tgt_set = all_info.keys()
4511 missing = set(self.wanted).difference(tgt_set)
4513 raise errors.OpExecError("Some instances were removed before"
4514 " retrieving their data: %s" % missing)
4515 instance_names = self.wanted
4517 instance_list = [all_info[iname] for iname in instance_names]
4519 # begin data gathering
4521 nodes = frozenset([inst.primary_node for inst in instance_list])
4522 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4526 if self.do_node_query:
4528 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4530 result = node_data[name]
4532 # offline nodes will be in both lists
4533 off_nodes.append(name)
4535 bad_nodes.append(name)
4538 live_data.update(result.payload)
4539 # else no instance is alive
4541 live_data = dict([(name, {}) for name in instance_names])
4543 # end data gathering
4548 cluster = self.cfg.GetClusterInfo()
4549 for instance in instance_list:
4551 i_hv = cluster.FillHV(instance, skip_globals=True)
4552 i_be = cluster.FillBE(instance)
4553 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4554 nic.nicparams) for nic in instance.nics]
4555 for field in self.op.output_fields:
4556 st_match = self._FIELDS_STATIC.Matches(field)
4557 if field in self._SIMPLE_FIELDS:
4558 val = getattr(instance, field)
4559 elif field == "pnode":
4560 val = instance.primary_node
4561 elif field == "snodes":
4562 val = list(instance.secondary_nodes)
4563 elif field == "admin_state":
4564 val = instance.admin_up
4565 elif field == "oper_state":
4566 if instance.primary_node in bad_nodes:
4569 val = bool(live_data.get(instance.name))
4570 elif field == "status":
4571 if instance.primary_node in off_nodes:
4572 val = "ERROR_nodeoffline"
4573 elif instance.primary_node in bad_nodes:
4574 val = "ERROR_nodedown"
4576 running = bool(live_data.get(instance.name))
4578 if instance.admin_up:
4583 if instance.admin_up:
4587 elif field == "oper_ram":
4588 if instance.primary_node in bad_nodes:
4590 elif instance.name in live_data:
4591 val = live_data[instance.name].get("memory", "?")
4594 elif field == "vcpus":
4595 val = i_be[constants.BE_VCPUS]
4596 elif field == "disk_template":
4597 val = instance.disk_template
4600 val = instance.nics[0].ip
4603 elif field == "nic_mode":
4605 val = i_nicp[0][constants.NIC_MODE]
4608 elif field == "nic_link":
4610 val = i_nicp[0][constants.NIC_LINK]
4613 elif field == "bridge":
4614 if (instance.nics and
4615 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4616 val = i_nicp[0][constants.NIC_LINK]
4619 elif field == "mac":
4621 val = instance.nics[0].mac
4624 elif field == "sda_size" or field == "sdb_size":
4625 idx = ord(field[2]) - ord('a')
4627 val = instance.FindDisk(idx).size
4628 except errors.OpPrereqError:
4630 elif field == "disk_usage": # total disk usage per node
4631 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4632 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4633 elif field == "tags":
4634 val = list(instance.GetTags())
4635 elif field == "hvparams":
4637 elif (field.startswith(HVPREFIX) and
4638 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4639 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4640 val = i_hv.get(field[len(HVPREFIX):], None)
4641 elif field == "beparams":
4643 elif (field.startswith(BEPREFIX) and
4644 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4645 val = i_be.get(field[len(BEPREFIX):], None)
4646 elif st_match and st_match.groups():
4647 # matches a variable list
4648 st_groups = st_match.groups()
4649 if st_groups and st_groups[0] == "disk":
4650 if st_groups[1] == "count":
4651 val = len(instance.disks)
4652 elif st_groups[1] == "sizes":
4653 val = [disk.size for disk in instance.disks]
4654 elif st_groups[1] == "size":
4656 val = instance.FindDisk(st_groups[2]).size
4657 except errors.OpPrereqError:
4660 assert False, "Unhandled disk parameter"
4661 elif st_groups[0] == "nic":
4662 if st_groups[1] == "count":
4663 val = len(instance.nics)
4664 elif st_groups[1] == "macs":
4665 val = [nic.mac for nic in instance.nics]
4666 elif st_groups[1] == "ips":
4667 val = [nic.ip for nic in instance.nics]
4668 elif st_groups[1] == "modes":
4669 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4670 elif st_groups[1] == "links":
4671 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4672 elif st_groups[1] == "bridges":
4675 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4676 val.append(nicp[constants.NIC_LINK])
4681 nic_idx = int(st_groups[2])
4682 if nic_idx >= len(instance.nics):
4685 if st_groups[1] == "mac":
4686 val = instance.nics[nic_idx].mac
4687 elif st_groups[1] == "ip":
4688 val = instance.nics[nic_idx].ip
4689 elif st_groups[1] == "mode":
4690 val = i_nicp[nic_idx][constants.NIC_MODE]
4691 elif st_groups[1] == "link":
4692 val = i_nicp[nic_idx][constants.NIC_LINK]
4693 elif st_groups[1] == "bridge":
4694 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4695 if nic_mode == constants.NIC_MODE_BRIDGED:
4696 val = i_nicp[nic_idx][constants.NIC_LINK]
4700 assert False, "Unhandled NIC parameter"
4702 assert False, ("Declared but unhandled variable parameter '%s'" %
4705 assert False, "Declared but unhandled parameter '%s'" % field
4712 class LUFailoverInstance(LogicalUnit):
4713 """Failover an instance.
4716 HPATH = "instance-failover"
4717 HTYPE = constants.HTYPE_INSTANCE
4718 _OP_REQP = ["instance_name", "ignore_consistency"]
4721 def CheckArguments(self):
4722 """Check the arguments.
4725 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4726 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4728 def ExpandNames(self):
4729 self._ExpandAndLockInstance()
4730 self.needed_locks[locking.LEVEL_NODE] = []
4731 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4733 def DeclareLocks(self, level):
4734 if level == locking.LEVEL_NODE:
4735 self._LockInstancesNodes()
4737 def BuildHooksEnv(self):
4740 This runs on master, primary and secondary nodes of the instance.
4743 instance = self.instance
4744 source_node = instance.primary_node
4745 target_node = instance.secondary_nodes[0]
4747 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4748 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4749 "OLD_PRIMARY": source_node,
4750 "OLD_SECONDARY": target_node,
4751 "NEW_PRIMARY": target_node,
4752 "NEW_SECONDARY": source_node,
4754 env.update(_BuildInstanceHookEnvByObject(self, instance))
4755 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4757 nl_post.append(source_node)
4758 return env, nl, nl_post
4760 def CheckPrereq(self):
4761 """Check prerequisites.
4763 This checks that the instance is in the cluster.
4766 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4767 assert self.instance is not None, \
4768 "Cannot retrieve locked instance %s" % self.op.instance_name
4770 bep = self.cfg.GetClusterInfo().FillBE(instance)
4771 if instance.disk_template not in constants.DTS_NET_MIRROR:
4772 raise errors.OpPrereqError("Instance's disk layout is not"
4773 " network mirrored, cannot failover.",
4776 secondary_nodes = instance.secondary_nodes
4777 if not secondary_nodes:
4778 raise errors.ProgrammerError("no secondary node but using "
4779 "a mirrored disk template")
4781 target_node = secondary_nodes[0]
4782 _CheckNodeOnline(self, target_node)
4783 _CheckNodeNotDrained(self, target_node)
4784 if instance.admin_up:
4785 # check memory requirements on the secondary node
4786 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4787 instance.name, bep[constants.BE_MEMORY],
4788 instance.hypervisor)
4790 self.LogInfo("Not checking memory on the secondary node as"
4791 " instance will not be started")
4793 # check bridge existance
4794 _CheckInstanceBridgesExist(self, instance, node=target_node)
4796 def Exec(self, feedback_fn):
4797 """Failover an instance.
4799 The failover is done by shutting it down on its present node and
4800 starting it on the secondary.
4803 instance = self.instance
4805 source_node = instance.primary_node
4806 target_node = instance.secondary_nodes[0]
4808 if instance.admin_up:
4809 feedback_fn("* checking disk consistency between source and target")
4810 for dev in instance.disks:
4811 # for drbd, these are drbd over lvm
4812 if not _CheckDiskConsistency(self, dev, target_node, False):
4813 if not self.op.ignore_consistency:
4814 raise errors.OpExecError("Disk %s is degraded on target node,"
4815 " aborting failover." % dev.iv_name)
4817 feedback_fn("* not checking disk consistency as instance is not running")
4819 feedback_fn("* shutting down instance on source node")
4820 logging.info("Shutting down instance %s on node %s",
4821 instance.name, source_node)
4823 result = self.rpc.call_instance_shutdown(source_node, instance,
4824 self.shutdown_timeout)
4825 msg = result.fail_msg
4827 if self.op.ignore_consistency:
4828 self.proc.LogWarning("Could not shutdown instance %s on node %s."
4829 " Proceeding anyway. Please make sure node"
4830 " %s is down. Error details: %s",
4831 instance.name, source_node, source_node, msg)
4833 raise errors.OpExecError("Could not shutdown instance %s on"
4835 (instance.name, source_node, msg))
4837 feedback_fn("* deactivating the instance's disks on source node")
4838 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4839 raise errors.OpExecError("Can't shut down the instance's disks.")
4841 instance.primary_node = target_node
4842 # distribute new instance config to the other nodes
4843 self.cfg.Update(instance, feedback_fn)
4845 # Only start the instance if it's marked as up
4846 if instance.admin_up:
4847 feedback_fn("* activating the instance's disks on target node")
4848 logging.info("Starting instance %s on node %s",
4849 instance.name, target_node)
4851 disks_ok, _ = _AssembleInstanceDisks(self, instance,
4852 ignore_secondaries=True)
4854 _ShutdownInstanceDisks(self, instance)
4855 raise errors.OpExecError("Can't activate the instance's disks")
4857 feedback_fn("* starting the instance on the target node")
4858 result = self.rpc.call_instance_start(target_node, instance, None, None)
4859 msg = result.fail_msg
4861 _ShutdownInstanceDisks(self, instance)
4862 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4863 (instance.name, target_node, msg))
4866 class LUMigrateInstance(LogicalUnit):
4867 """Migrate an instance.
4869 This is migration without shutting down, compared to the failover,
4870 which is done with shutdown.
4873 HPATH = "instance-migrate"
4874 HTYPE = constants.HTYPE_INSTANCE
4875 _OP_REQP = ["instance_name", "live", "cleanup"]
4879 def ExpandNames(self):
4880 self._ExpandAndLockInstance()
4882 self.needed_locks[locking.LEVEL_NODE] = []
4883 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4885 self._migrater = TLMigrateInstance(self, self.op.instance_name,
4886 self.op.live, self.op.cleanup)
4887 self.tasklets = [self._migrater]
4889 def DeclareLocks(self, level):
4890 if level == locking.LEVEL_NODE:
4891 self._LockInstancesNodes()
4893 def BuildHooksEnv(self):
4896 This runs on master, primary and secondary nodes of the instance.
4899 instance = self._migrater.instance
4900 source_node = instance.primary_node
4901 target_node = instance.secondary_nodes[0]
4902 env = _BuildInstanceHookEnvByObject(self, instance)
4903 env["MIGRATE_LIVE"] = self.op.live
4904 env["MIGRATE_CLEANUP"] = self.op.cleanup
4906 "OLD_PRIMARY": source_node,
4907 "OLD_SECONDARY": target_node,
4908 "NEW_PRIMARY": target_node,
4909 "NEW_SECONDARY": source_node,
4911 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4913 nl_post.append(source_node)
4914 return env, nl, nl_post
4917 class LUMoveInstance(LogicalUnit):
4918 """Move an instance by data-copying.
4921 HPATH = "instance-move"
4922 HTYPE = constants.HTYPE_INSTANCE
4923 _OP_REQP = ["instance_name", "target_node"]
4926 def CheckArguments(self):
4927 """Check the arguments.
4930 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4931 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4933 def ExpandNames(self):
4934 self._ExpandAndLockInstance()
4935 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4936 self.op.target_node = target_node
4937 self.needed_locks[locking.LEVEL_NODE] = [target_node]
4938 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4940 def DeclareLocks(self, level):
4941 if level == locking.LEVEL_NODE:
4942 self._LockInstancesNodes(primary_only=True)
4944 def BuildHooksEnv(self):
4947 This runs on master, primary and secondary nodes of the instance.
4951 "TARGET_NODE": self.op.target_node,
4952 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4954 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4955 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4956 self.op.target_node]
4959 def CheckPrereq(self):
4960 """Check prerequisites.
4962 This checks that the instance is in the cluster.
4965 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4966 assert self.instance is not None, \
4967 "Cannot retrieve locked instance %s" % self.op.instance_name
4969 node = self.cfg.GetNodeInfo(self.op.target_node)
4970 assert node is not None, \
4971 "Cannot retrieve locked node %s" % self.op.target_node
4973 self.target_node = target_node = node.name
4975 if target_node == instance.primary_node:
4976 raise errors.OpPrereqError("Instance %s is already on the node %s" %
4977 (instance.name, target_node),
4980 bep = self.cfg.GetClusterInfo().FillBE(instance)
4982 for idx, dsk in enumerate(instance.disks):
4983 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4984 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4985 " cannot copy" % idx, errors.ECODE_STATE)
4987 _CheckNodeOnline(self, target_node)
4988 _CheckNodeNotDrained(self, target_node)
4990 if instance.admin_up:
4991 # check memory requirements on the secondary node
4992 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4993 instance.name, bep[constants.BE_MEMORY],
4994 instance.hypervisor)
4996 self.LogInfo("Not checking memory on the secondary node as"
4997 " instance will not be started")
4999 # check bridge existance
5000 _CheckInstanceBridgesExist(self, instance, node=target_node)
5002 def Exec(self, feedback_fn):
5003 """Move an instance.
5005 The move is done by shutting it down on its present node, copying
5006 the data over (slow) and starting it on the new node.
5009 instance = self.instance
5011 source_node = instance.primary_node
5012 target_node = self.target_node
5014 self.LogInfo("Shutting down instance %s on source node %s",
5015 instance.name, source_node)
5017 result = self.rpc.call_instance_shutdown(source_node, instance,
5018 self.shutdown_timeout)
5019 msg = result.fail_msg
5021 if self.op.ignore_consistency:
5022 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5023 " Proceeding anyway. Please make sure node"
5024 " %s is down. Error details: %s",
5025 instance.name, source_node, source_node, msg)
5027 raise errors.OpExecError("Could not shutdown instance %s on"
5029 (instance.name, source_node, msg))
5031 # create the target disks
5033 _CreateDisks(self, instance, target_node=target_node)
5034 except errors.OpExecError:
5035 self.LogWarning("Device creation failed, reverting...")
5037 _RemoveDisks(self, instance, target_node=target_node)
5039 self.cfg.ReleaseDRBDMinors(instance.name)
5042 cluster_name = self.cfg.GetClusterInfo().cluster_name
5045 # activate, get path, copy the data over
5046 for idx, disk in enumerate(instance.disks):
5047 self.LogInfo("Copying data for disk %d", idx)
5048 result = self.rpc.call_blockdev_assemble(target_node, disk,
5049 instance.name, True)
5051 self.LogWarning("Can't assemble newly created disk %d: %s",
5052 idx, result.fail_msg)
5053 errs.append(result.fail_msg)
5055 dev_path = result.payload
5056 result = self.rpc.call_blockdev_export(source_node, disk,
5057 target_node, dev_path,
5060 self.LogWarning("Can't copy data over for disk %d: %s",
5061 idx, result.fail_msg)
5062 errs.append(result.fail_msg)
5066 self.LogWarning("Some disks failed to copy, aborting")
5068 _RemoveDisks(self, instance, target_node=target_node)
5070 self.cfg.ReleaseDRBDMinors(instance.name)
5071 raise errors.OpExecError("Errors during disk copy: %s" %
5074 instance.primary_node = target_node
5075 self.cfg.Update(instance, feedback_fn)
5077 self.LogInfo("Removing the disks on the original node")
5078 _RemoveDisks(self, instance, target_node=source_node)
5080 # Only start the instance if it's marked as up
5081 if instance.admin_up:
5082 self.LogInfo("Starting instance %s on node %s",
5083 instance.name, target_node)
5085 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5086 ignore_secondaries=True)
5088 _ShutdownInstanceDisks(self, instance)
5089 raise errors.OpExecError("Can't activate the instance's disks")
5091 result = self.rpc.call_instance_start(target_node, instance, None, None)
5092 msg = result.fail_msg
5094 _ShutdownInstanceDisks(self, instance)
5095 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5096 (instance.name, target_node, msg))
5099 class LUMigrateNode(LogicalUnit):
5100 """Migrate all instances from a node.
5103 HPATH = "node-migrate"
5104 HTYPE = constants.HTYPE_NODE
5105 _OP_REQP = ["node_name", "live"]
5108 def ExpandNames(self):
5109 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5111 self.needed_locks = {
5112 locking.LEVEL_NODE: [self.op.node_name],
5115 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5117 # Create tasklets for migrating instances for all instances on this node
5121 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5122 logging.debug("Migrating instance %s", inst.name)
5123 names.append(inst.name)
5125 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5127 self.tasklets = tasklets
5129 # Declare instance locks
5130 self.needed_locks[locking.LEVEL_INSTANCE] = names
5132 def DeclareLocks(self, level):
5133 if level == locking.LEVEL_NODE:
5134 self._LockInstancesNodes()
5136 def BuildHooksEnv(self):
5139 This runs on the master, the primary and all the secondaries.
5143 "NODE_NAME": self.op.node_name,
5146 nl = [self.cfg.GetMasterNode()]
5148 return (env, nl, nl)
5151 class TLMigrateInstance(Tasklet):
5152 def __init__(self, lu, instance_name, live, cleanup):
5153 """Initializes this class.
5156 Tasklet.__init__(self, lu)
5159 self.instance_name = instance_name
5161 self.cleanup = cleanup
5163 def CheckPrereq(self):
5164 """Check prerequisites.
5166 This checks that the instance is in the cluster.
5169 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5170 instance = self.cfg.GetInstanceInfo(instance_name)
5171 assert instance is not None
5173 if instance.disk_template != constants.DT_DRBD8:
5174 raise errors.OpPrereqError("Instance's disk layout is not"
5175 " drbd8, cannot migrate.", errors.ECODE_STATE)
5177 secondary_nodes = instance.secondary_nodes
5178 if not secondary_nodes:
5179 raise errors.ConfigurationError("No secondary node but using"
5180 " drbd8 disk template")
5182 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5184 target_node = secondary_nodes[0]
5185 # check memory requirements on the secondary node
5186 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5187 instance.name, i_be[constants.BE_MEMORY],
5188 instance.hypervisor)
5190 # check bridge existance
5191 _CheckInstanceBridgesExist(self, instance, node=target_node)
5193 if not self.cleanup:
5194 _CheckNodeNotDrained(self, target_node)
5195 result = self.rpc.call_instance_migratable(instance.primary_node,
5197 result.Raise("Can't migrate, please use failover",
5198 prereq=True, ecode=errors.ECODE_STATE)
5200 self.instance = instance
5202 def _WaitUntilSync(self):
5203 """Poll with custom rpc for disk sync.
5205 This uses our own step-based rpc call.
5208 self.feedback_fn("* wait until resync is done")
5212 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5214 self.instance.disks)
5216 for node, nres in result.items():
5217 nres.Raise("Cannot resync disks on node %s" % node)
5218 node_done, node_percent = nres.payload
5219 all_done = all_done and node_done
5220 if node_percent is not None:
5221 min_percent = min(min_percent, node_percent)
5223 if min_percent < 100:
5224 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5227 def _EnsureSecondary(self, node):
5228 """Demote a node to secondary.
5231 self.feedback_fn("* switching node %s to secondary mode" % node)
5233 for dev in self.instance.disks:
5234 self.cfg.SetDiskID(dev, node)
5236 result = self.rpc.call_blockdev_close(node, self.instance.name,
5237 self.instance.disks)
5238 result.Raise("Cannot change disk to secondary on node %s" % node)
5240 def _GoStandalone(self):
5241 """Disconnect from the network.
5244 self.feedback_fn("* changing into standalone mode")
5245 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5246 self.instance.disks)
5247 for node, nres in result.items():
5248 nres.Raise("Cannot disconnect disks node %s" % node)
5250 def _GoReconnect(self, multimaster):
5251 """Reconnect to the network.
5257 msg = "single-master"
5258 self.feedback_fn("* changing disks into %s mode" % msg)
5259 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5260 self.instance.disks,
5261 self.instance.name, multimaster)
5262 for node, nres in result.items():
5263 nres.Raise("Cannot change disks config on node %s" % node)
5265 def _ExecCleanup(self):
5266 """Try to cleanup after a failed migration.
5268 The cleanup is done by:
5269 - check that the instance is running only on one node
5270 (and update the config if needed)
5271 - change disks on its secondary node to secondary
5272 - wait until disks are fully synchronized
5273 - disconnect from the network
5274 - change disks into single-master mode
5275 - wait again until disks are fully synchronized
5278 instance = self.instance
5279 target_node = self.target_node
5280 source_node = self.source_node
5282 # check running on only one node
5283 self.feedback_fn("* checking where the instance actually runs"
5284 " (if this hangs, the hypervisor might be in"
5286 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5287 for node, result in ins_l.items():
5288 result.Raise("Can't contact node %s" % node)
5290 runningon_source = instance.name in ins_l[source_node].payload
5291 runningon_target = instance.name in ins_l[target_node].payload
5293 if runningon_source and runningon_target:
5294 raise errors.OpExecError("Instance seems to be running on two nodes,"
5295 " or the hypervisor is confused. You will have"
5296 " to ensure manually that it runs only on one"
5297 " and restart this operation.")
5299 if not (runningon_source or runningon_target):
5300 raise errors.OpExecError("Instance does not seem to be running at all."
5301 " In this case, it's safer to repair by"
5302 " running 'gnt-instance stop' to ensure disk"
5303 " shutdown, and then restarting it.")
5305 if runningon_target:
5306 # the migration has actually succeeded, we need to update the config
5307 self.feedback_fn("* instance running on secondary node (%s),"
5308 " updating config" % target_node)
5309 instance.primary_node = target_node
5310 self.cfg.Update(instance, self.feedback_fn)
5311 demoted_node = source_node
5313 self.feedback_fn("* instance confirmed to be running on its"
5314 " primary node (%s)" % source_node)
5315 demoted_node = target_node
5317 self._EnsureSecondary(demoted_node)
5319 self._WaitUntilSync()
5320 except errors.OpExecError:
5321 # we ignore here errors, since if the device is standalone, it
5322 # won't be able to sync
5324 self._GoStandalone()
5325 self._GoReconnect(False)
5326 self._WaitUntilSync()
5328 self.feedback_fn("* done")
5330 def _RevertDiskStatus(self):
5331 """Try to revert the disk status after a failed migration.
5334 target_node = self.target_node
5336 self._EnsureSecondary(target_node)
5337 self._GoStandalone()
5338 self._GoReconnect(False)
5339 self._WaitUntilSync()
5340 except errors.OpExecError, err:
5341 self.lu.LogWarning("Migration failed and I can't reconnect the"
5342 " drives: error '%s'\n"
5343 "Please look and recover the instance status" %
5346 def _AbortMigration(self):
5347 """Call the hypervisor code to abort a started migration.
5350 instance = self.instance
5351 target_node = self.target_node
5352 migration_info = self.migration_info
5354 abort_result = self.rpc.call_finalize_migration(target_node,
5358 abort_msg = abort_result.fail_msg
5360 logging.error("Aborting migration failed on target node %s: %s",
5361 target_node, abort_msg)
5362 # Don't raise an exception here, as we stil have to try to revert the
5363 # disk status, even if this step failed.
5365 def _ExecMigration(self):
5366 """Migrate an instance.
5368 The migrate is done by:
5369 - change the disks into dual-master mode
5370 - wait until disks are fully synchronized again
5371 - migrate the instance
5372 - change disks on the new secondary node (the old primary) to secondary
5373 - wait until disks are fully synchronized
5374 - change disks into single-master mode
5377 instance = self.instance
5378 target_node = self.target_node
5379 source_node = self.source_node
5381 self.feedback_fn("* checking disk consistency between source and target")
5382 for dev in instance.disks:
5383 if not _CheckDiskConsistency(self, dev, target_node, False):
5384 raise errors.OpExecError("Disk %s is degraded or not fully"
5385 " synchronized on target node,"
5386 " aborting migrate." % dev.iv_name)
5388 # First get the migration information from the remote node
5389 result = self.rpc.call_migration_info(source_node, instance)
5390 msg = result.fail_msg
5392 log_err = ("Failed fetching source migration information from %s: %s" %
5394 logging.error(log_err)
5395 raise errors.OpExecError(log_err)
5397 self.migration_info = migration_info = result.payload
5399 # Then switch the disks to master/master mode
5400 self._EnsureSecondary(target_node)
5401 self._GoStandalone()
5402 self._GoReconnect(True)
5403 self._WaitUntilSync()
5405 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5406 result = self.rpc.call_accept_instance(target_node,
5409 self.nodes_ip[target_node])
5411 msg = result.fail_msg
5413 logging.error("Instance pre-migration failed, trying to revert"
5414 " disk status: %s", msg)
5415 self.feedback_fn("Pre-migration failed, aborting")
5416 self._AbortMigration()
5417 self._RevertDiskStatus()
5418 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5419 (instance.name, msg))
5421 self.feedback_fn("* migrating instance to %s" % target_node)
5423 result = self.rpc.call_instance_migrate(source_node, instance,
5424 self.nodes_ip[target_node],
5426 msg = result.fail_msg
5428 logging.error("Instance migration failed, trying to revert"
5429 " disk status: %s", msg)
5430 self.feedback_fn("Migration failed, aborting")
5431 self._AbortMigration()
5432 self._RevertDiskStatus()
5433 raise errors.OpExecError("Could not migrate instance %s: %s" %
5434 (instance.name, msg))
5437 instance.primary_node = target_node
5438 # distribute new instance config to the other nodes
5439 self.cfg.Update(instance, self.feedback_fn)
5441 result = self.rpc.call_finalize_migration(target_node,
5445 msg = result.fail_msg
5447 logging.error("Instance migration succeeded, but finalization failed:"
5449 raise errors.OpExecError("Could not finalize instance migration: %s" %
5452 self._EnsureSecondary(source_node)
5453 self._WaitUntilSync()
5454 self._GoStandalone()
5455 self._GoReconnect(False)
5456 self._WaitUntilSync()
5458 self.feedback_fn("* done")
5460 def Exec(self, feedback_fn):
5461 """Perform the migration.
5464 feedback_fn("Migrating instance %s" % self.instance.name)
5466 self.feedback_fn = feedback_fn
5468 self.source_node = self.instance.primary_node
5469 self.target_node = self.instance.secondary_nodes[0]
5470 self.all_nodes = [self.source_node, self.target_node]
5472 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5473 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5477 return self._ExecCleanup()
5479 return self._ExecMigration()
5482 def _CreateBlockDev(lu, node, instance, device, force_create,
5484 """Create a tree of block devices on a given node.
5486 If this device type has to be created on secondaries, create it and
5489 If not, just recurse to children keeping the same 'force' value.
5491 @param lu: the lu on whose behalf we execute
5492 @param node: the node on which to create the device
5493 @type instance: L{objects.Instance}
5494 @param instance: the instance which owns the device
5495 @type device: L{objects.Disk}
5496 @param device: the device to create
5497 @type force_create: boolean
5498 @param force_create: whether to force creation of this device; this
5499 will be change to True whenever we find a device which has
5500 CreateOnSecondary() attribute
5501 @param info: the extra 'metadata' we should attach to the device
5502 (this will be represented as a LVM tag)
5503 @type force_open: boolean
5504 @param force_open: this parameter will be passes to the
5505 L{backend.BlockdevCreate} function where it specifies
5506 whether we run on primary or not, and it affects both
5507 the child assembly and the device own Open() execution
5510 if device.CreateOnSecondary():
5514 for child in device.children:
5515 _CreateBlockDev(lu, node, instance, child, force_create,
5518 if not force_create:
5521 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5524 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5525 """Create a single block device on a given node.
5527 This will not recurse over children of the device, so they must be
5530 @param lu: the lu on whose behalf we execute
5531 @param node: the node on which to create the device
5532 @type instance: L{objects.Instance}
5533 @param instance: the instance which owns the device
5534 @type device: L{objects.Disk}
5535 @param device: the device to create
5536 @param info: the extra 'metadata' we should attach to the device
5537 (this will be represented as a LVM tag)
5538 @type force_open: boolean
5539 @param force_open: this parameter will be passes to the
5540 L{backend.BlockdevCreate} function where it specifies
5541 whether we run on primary or not, and it affects both
5542 the child assembly and the device own Open() execution
5545 lu.cfg.SetDiskID(device, node)
5546 result = lu.rpc.call_blockdev_create(node, device, device.size,
5547 instance.name, force_open, info)
5548 result.Raise("Can't create block device %s on"
5549 " node %s for instance %s" % (device, node, instance.name))
5550 if device.physical_id is None:
5551 device.physical_id = result.payload
5554 def _GenerateUniqueNames(lu, exts):
5555 """Generate a suitable LV name.
5557 This will generate a logical volume name for the given instance.
5562 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5563 results.append("%s%s" % (new_id, val))
5567 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5569 """Generate a drbd8 device complete with its children.
5572 port = lu.cfg.AllocatePort()
5573 vgname = lu.cfg.GetVGName()
5574 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5575 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5576 logical_id=(vgname, names[0]))
5577 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5578 logical_id=(vgname, names[1]))
5579 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5580 logical_id=(primary, secondary, port,
5583 children=[dev_data, dev_meta],
5588 def _GenerateDiskTemplate(lu, template_name,
5589 instance_name, primary_node,
5590 secondary_nodes, disk_info,
5591 file_storage_dir, file_driver,
5593 """Generate the entire disk layout for a given template type.
5596 #TODO: compute space requirements
5598 vgname = lu.cfg.GetVGName()
5599 disk_count = len(disk_info)
5601 if template_name == constants.DT_DISKLESS:
5603 elif template_name == constants.DT_PLAIN:
5604 if len(secondary_nodes) != 0:
5605 raise errors.ProgrammerError("Wrong template configuration")
5607 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5608 for i in range(disk_count)])
5609 for idx, disk in enumerate(disk_info):
5610 disk_index = idx + base_index
5611 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5612 logical_id=(vgname, names[idx]),
5613 iv_name="disk/%d" % disk_index,
5615 disks.append(disk_dev)
5616 elif template_name == constants.DT_DRBD8:
5617 if len(secondary_nodes) != 1:
5618 raise errors.ProgrammerError("Wrong template configuration")
5619 remote_node = secondary_nodes[0]
5620 minors = lu.cfg.AllocateDRBDMinor(
5621 [primary_node, remote_node] * len(disk_info), instance_name)
5624 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5625 for i in range(disk_count)]):
5626 names.append(lv_prefix + "_data")
5627 names.append(lv_prefix + "_meta")
5628 for idx, disk in enumerate(disk_info):
5629 disk_index = idx + base_index
5630 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5631 disk["size"], names[idx*2:idx*2+2],
5632 "disk/%d" % disk_index,
5633 minors[idx*2], minors[idx*2+1])
5634 disk_dev.mode = disk["mode"]
5635 disks.append(disk_dev)
5636 elif template_name == constants.DT_FILE:
5637 if len(secondary_nodes) != 0:
5638 raise errors.ProgrammerError("Wrong template configuration")
5640 for idx, disk in enumerate(disk_info):
5641 disk_index = idx + base_index
5642 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5643 iv_name="disk/%d" % disk_index,
5644 logical_id=(file_driver,
5645 "%s/disk%d" % (file_storage_dir,
5648 disks.append(disk_dev)
5650 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5654 def _GetInstanceInfoText(instance):
5655 """Compute that text that should be added to the disk's metadata.
5658 return "originstname+%s" % instance.name
5661 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5662 """Create all disks for an instance.
5664 This abstracts away some work from AddInstance.
5666 @type lu: L{LogicalUnit}
5667 @param lu: the logical unit on whose behalf we execute
5668 @type instance: L{objects.Instance}
5669 @param instance: the instance whose disks we should create
5671 @param to_skip: list of indices to skip
5672 @type target_node: string
5673 @param target_node: if passed, overrides the target node for creation
5675 @return: the success of the creation
5678 info = _GetInstanceInfoText(instance)
5679 if target_node is None:
5680 pnode = instance.primary_node
5681 all_nodes = instance.all_nodes
5686 if instance.disk_template == constants.DT_FILE:
5687 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5688 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5690 result.Raise("Failed to create directory '%s' on"
5691 " node %s" % (file_storage_dir, pnode))
5693 # Note: this needs to be kept in sync with adding of disks in
5694 # LUSetInstanceParams
5695 for idx, device in enumerate(instance.disks):
5696 if to_skip and idx in to_skip:
5698 logging.info("Creating volume %s for instance %s",
5699 device.iv_name, instance.name)
5701 for node in all_nodes:
5702 f_create = node == pnode
5703 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5706 def _RemoveDisks(lu, instance, target_node=None):
5707 """Remove all disks for an instance.
5709 This abstracts away some work from `AddInstance()` and
5710 `RemoveInstance()`. Note that in case some of the devices couldn't
5711 be removed, the removal will continue with the other ones (compare
5712 with `_CreateDisks()`).
5714 @type lu: L{LogicalUnit}
5715 @param lu: the logical unit on whose behalf we execute
5716 @type instance: L{objects.Instance}
5717 @param instance: the instance whose disks we should remove
5718 @type target_node: string
5719 @param target_node: used to override the node on which to remove the disks
5721 @return: the success of the removal
5724 logging.info("Removing block devices for instance %s", instance.name)
5727 for device in instance.disks:
5729 edata = [(target_node, device)]
5731 edata = device.ComputeNodeTree(instance.primary_node)
5732 for node, disk in edata:
5733 lu.cfg.SetDiskID(disk, node)
5734 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5736 lu.LogWarning("Could not remove block device %s on node %s,"
5737 " continuing anyway: %s", device.iv_name, node, msg)
5740 if instance.disk_template == constants.DT_FILE:
5741 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5745 tgt = instance.primary_node
5746 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5748 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5749 file_storage_dir, instance.primary_node, result.fail_msg)
5755 def _ComputeDiskSize(disk_template, disks):
5756 """Compute disk size requirements in the volume group
5759 # Required free disk space as a function of disk and swap space
5761 constants.DT_DISKLESS: None,
5762 constants.DT_PLAIN: sum(d["size"] for d in disks),
5763 # 128 MB are added for drbd metadata for each disk
5764 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5765 constants.DT_FILE: None,
5768 if disk_template not in req_size_dict:
5769 raise errors.ProgrammerError("Disk template '%s' size requirement"
5770 " is unknown" % disk_template)
5772 return req_size_dict[disk_template]
5775 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5776 """Hypervisor parameter validation.
5778 This function abstract the hypervisor parameter validation to be
5779 used in both instance create and instance modify.
5781 @type lu: L{LogicalUnit}
5782 @param lu: the logical unit for which we check
5783 @type nodenames: list
5784 @param nodenames: the list of nodes on which we should check
5785 @type hvname: string
5786 @param hvname: the name of the hypervisor we should use
5787 @type hvparams: dict
5788 @param hvparams: the parameters which we need to check
5789 @raise errors.OpPrereqError: if the parameters are not valid
5792 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5795 for node in nodenames:
5799 info.Raise("Hypervisor parameter validation failed on node %s" % node)
5802 class LUCreateInstance(LogicalUnit):
5803 """Create an instance.
5806 HPATH = "instance-add"
5807 HTYPE = constants.HTYPE_INSTANCE
5808 _OP_REQP = ["instance_name", "disks", "disk_template",
5810 "wait_for_sync", "ip_check", "nics",
5811 "hvparams", "beparams"]
5814 def CheckArguments(self):
5818 # set optional parameters to none if they don't exist
5819 for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5820 if not hasattr(self.op, attr):
5821 setattr(self.op, attr, None)
5823 # do not require name_check to ease forward/backward compatibility
5825 if not hasattr(self.op, "name_check"):
5826 self.op.name_check = True
5827 # validate/normalize the instance name
5828 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
5829 if self.op.ip_check and not self.op.name_check:
5830 # TODO: make the ip check more flexible and not depend on the name check
5831 raise errors.OpPrereqError("Cannot do ip checks without a name check",
5833 if (self.op.disk_template == constants.DT_FILE and
5834 not constants.ENABLE_FILE_STORAGE):
5835 raise errors.OpPrereqError("File storage disabled at configure time",
5837 # check disk information: either all adopt, or no adopt
5838 has_adopt = has_no_adopt = False
5839 for disk in self.op.disks:
5844 if has_adopt and has_no_adopt:
5845 raise errors.OpPrereqError("Either all disks have are adoped or none is",
5848 if self.op.disk_template != constants.DT_PLAIN:
5849 raise errors.OpPrereqError("Disk adoption is only supported for the"
5850 " 'plain' disk template",
5852 if self.op.iallocator is not None:
5853 raise errors.OpPrereqError("Disk adoption not allowed with an"
5854 " iallocator script", errors.ECODE_INVAL)
5855 if self.op.mode == constants.INSTANCE_IMPORT:
5856 raise errors.OpPrereqError("Disk adoption not allowed for"
5857 " instance import", errors.ECODE_INVAL)
5859 self.adopt_disks = has_adopt
5861 def ExpandNames(self):
5862 """ExpandNames for CreateInstance.
5864 Figure out the right locks for instance creation.
5867 self.needed_locks = {}
5869 # cheap checks, mostly valid constants given
5871 # verify creation mode
5872 if self.op.mode not in (constants.INSTANCE_CREATE,
5873 constants.INSTANCE_IMPORT):
5874 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5875 self.op.mode, errors.ECODE_INVAL)
5877 # disk template and mirror node verification
5878 _CheckDiskTemplate(self.op.disk_template)
5880 if self.op.hypervisor is None:
5881 self.op.hypervisor = self.cfg.GetHypervisorType()
5883 cluster = self.cfg.GetClusterInfo()
5884 enabled_hvs = cluster.enabled_hypervisors
5885 if self.op.hypervisor not in enabled_hvs:
5886 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5887 " cluster (%s)" % (self.op.hypervisor,
5888 ",".join(enabled_hvs)),
5891 # check hypervisor parameter syntax (locally)
5892 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5893 filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5895 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5896 hv_type.CheckParameterSyntax(filled_hvp)
5897 self.hv_full = filled_hvp
5898 # check that we don't specify global parameters on an instance
5899 _CheckGlobalHvParams(self.op.hvparams)
5901 # fill and remember the beparams dict
5902 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5903 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5906 #### instance parameters check
5908 # instance name verification
5909 if self.op.name_check:
5910 hostname1 = utils.GetHostInfo(self.op.instance_name)
5911 self.op.instance_name = instance_name = hostname1.name
5912 # used in CheckPrereq for ip ping check
5913 self.check_ip = hostname1.ip
5915 instance_name = self.op.instance_name
5916 self.check_ip = None
5918 # this is just a preventive check, but someone might still add this
5919 # instance in the meantime, and creation will fail at lock-add time
5920 if instance_name in self.cfg.GetInstanceList():
5921 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5922 instance_name, errors.ECODE_EXISTS)
5924 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5928 for idx, nic in enumerate(self.op.nics):
5929 nic_mode_req = nic.get("mode", None)
5930 nic_mode = nic_mode_req
5931 if nic_mode is None:
5932 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5934 # in routed mode, for the first nic, the default ip is 'auto'
5935 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5936 default_ip_mode = constants.VALUE_AUTO
5938 default_ip_mode = constants.VALUE_NONE
5940 # ip validity checks
5941 ip = nic.get("ip", default_ip_mode)
5942 if ip is None or ip.lower() == constants.VALUE_NONE:
5944 elif ip.lower() == constants.VALUE_AUTO:
5945 if not self.op.name_check:
5946 raise errors.OpPrereqError("IP address set to auto but name checks"
5947 " have been skipped. Aborting.",
5949 nic_ip = hostname1.ip
5951 if not utils.IsValidIP(ip):
5952 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5953 " like a valid IP" % ip,
5957 # TODO: check the ip address for uniqueness
5958 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5959 raise errors.OpPrereqError("Routed nic mode requires an ip address",
5962 # MAC address verification
5963 mac = nic.get("mac", constants.VALUE_AUTO)
5964 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5965 mac = utils.NormalizeAndValidateMac(mac)
5968 self.cfg.ReserveMAC(mac, self.proc.GetECId())
5969 except errors.ReservationError:
5970 raise errors.OpPrereqError("MAC address %s already in use"
5971 " in cluster" % mac,
5972 errors.ECODE_NOTUNIQUE)
5974 # bridge verification
5975 bridge = nic.get("bridge", None)
5976 link = nic.get("link", None)
5978 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5979 " at the same time", errors.ECODE_INVAL)
5980 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5981 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5988 nicparams[constants.NIC_MODE] = nic_mode_req
5990 nicparams[constants.NIC_LINK] = link
5992 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5994 objects.NIC.CheckParameterSyntax(check_params)
5995 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5997 # disk checks/pre-build
5999 for disk in self.op.disks:
6000 mode = disk.get("mode", constants.DISK_RDWR)
6001 if mode not in constants.DISK_ACCESS_SET:
6002 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6003 mode, errors.ECODE_INVAL)
6004 size = disk.get("size", None)
6006 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6009 except (TypeError, ValueError):
6010 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6012 new_disk = {"size": size, "mode": mode}
6014 new_disk["adopt"] = disk["adopt"]
6015 self.disks.append(new_disk)
6017 # file storage checks
6018 if (self.op.file_driver and
6019 not self.op.file_driver in constants.FILE_DRIVER):
6020 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6021 self.op.file_driver, errors.ECODE_INVAL)
6023 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6024 raise errors.OpPrereqError("File storage directory path not absolute",
6027 ### Node/iallocator related checks
6028 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6029 raise errors.OpPrereqError("One and only one of iallocator and primary"
6030 " node must be given",
6033 if self.op.iallocator:
6034 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6036 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6037 nodelist = [self.op.pnode]
6038 if self.op.snode is not None:
6039 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6040 nodelist.append(self.op.snode)
6041 self.needed_locks[locking.LEVEL_NODE] = nodelist
6043 # in case of import lock the source node too
6044 if self.op.mode == constants.INSTANCE_IMPORT:
6045 src_node = getattr(self.op, "src_node", None)
6046 src_path = getattr(self.op, "src_path", None)
6048 if src_path is None:
6049 self.op.src_path = src_path = self.op.instance_name
6051 if src_node is None:
6052 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6053 self.op.src_node = None
6054 if os.path.isabs(src_path):
6055 raise errors.OpPrereqError("Importing an instance from an absolute"
6056 " path requires a source node option.",
6059 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6060 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6061 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6062 if not os.path.isabs(src_path):
6063 self.op.src_path = src_path = \
6064 utils.PathJoin(constants.EXPORT_DIR, src_path)
6066 # On import force_variant must be True, because if we forced it at
6067 # initial install, our only chance when importing it back is that it
6069 self.op.force_variant = True
6071 else: # INSTANCE_CREATE
6072 if getattr(self.op, "os_type", None) is None:
6073 raise errors.OpPrereqError("No guest OS specified",
6075 self.op.force_variant = getattr(self.op, "force_variant", False)
6077 def _RunAllocator(self):
6078 """Run the allocator based on input opcode.
6081 nics = [n.ToDict() for n in self.nics]
6082 ial = IAllocator(self.cfg, self.rpc,
6083 mode=constants.IALLOCATOR_MODE_ALLOC,
6084 name=self.op.instance_name,
6085 disk_template=self.op.disk_template,
6088 vcpus=self.be_full[constants.BE_VCPUS],
6089 mem_size=self.be_full[constants.BE_MEMORY],
6092 hypervisor=self.op.hypervisor,
6095 ial.Run(self.op.iallocator)
6098 raise errors.OpPrereqError("Can't compute nodes using"
6099 " iallocator '%s': %s" %
6100 (self.op.iallocator, ial.info),
6102 if len(ial.result) != ial.required_nodes:
6103 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6104 " of nodes (%s), required %s" %
6105 (self.op.iallocator, len(ial.result),
6106 ial.required_nodes), errors.ECODE_FAULT)
6107 self.op.pnode = ial.result[0]
6108 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6109 self.op.instance_name, self.op.iallocator,
6110 utils.CommaJoin(ial.result))
6111 if ial.required_nodes == 2:
6112 self.op.snode = ial.result[1]
6114 def BuildHooksEnv(self):
6117 This runs on master, primary and secondary nodes of the instance.
6121 "ADD_MODE": self.op.mode,
6123 if self.op.mode == constants.INSTANCE_IMPORT:
6124 env["SRC_NODE"] = self.op.src_node
6125 env["SRC_PATH"] = self.op.src_path
6126 env["SRC_IMAGES"] = self.src_images
6128 env.update(_BuildInstanceHookEnv(
6129 name=self.op.instance_name,
6130 primary_node=self.op.pnode,
6131 secondary_nodes=self.secondaries,
6132 status=self.op.start,
6133 os_type=self.op.os_type,
6134 memory=self.be_full[constants.BE_MEMORY],
6135 vcpus=self.be_full[constants.BE_VCPUS],
6136 nics=_NICListToTuple(self, self.nics),
6137 disk_template=self.op.disk_template,
6138 disks=[(d["size"], d["mode"]) for d in self.disks],
6141 hypervisor_name=self.op.hypervisor,
6144 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6148 def CheckPrereq(self):
6149 """Check prerequisites.
6152 if (not self.cfg.GetVGName() and
6153 self.op.disk_template not in constants.DTS_NOT_LVM):
6154 raise errors.OpPrereqError("Cluster does not support lvm-based"
6155 " instances", errors.ECODE_STATE)
6157 if self.op.mode == constants.INSTANCE_IMPORT:
6158 src_node = self.op.src_node
6159 src_path = self.op.src_path
6161 if src_node is None:
6162 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6163 exp_list = self.rpc.call_export_list(locked_nodes)
6165 for node in exp_list:
6166 if exp_list[node].fail_msg:
6168 if src_path in exp_list[node].payload:
6170 self.op.src_node = src_node = node
6171 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6175 raise errors.OpPrereqError("No export found for relative path %s" %
6176 src_path, errors.ECODE_INVAL)
6178 _CheckNodeOnline(self, src_node)
6179 result = self.rpc.call_export_info(src_node, src_path)
6180 result.Raise("No export or invalid export found in dir %s" % src_path)
6182 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6183 if not export_info.has_section(constants.INISECT_EXP):
6184 raise errors.ProgrammerError("Corrupted export config",
6185 errors.ECODE_ENVIRON)
6187 ei_version = export_info.get(constants.INISECT_EXP, 'version')
6188 if (int(ei_version) != constants.EXPORT_VERSION):
6189 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6190 (ei_version, constants.EXPORT_VERSION),
6191 errors.ECODE_ENVIRON)
6193 # Check that the new instance doesn't have less disks than the export
6194 instance_disks = len(self.disks)
6195 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6196 if instance_disks < export_disks:
6197 raise errors.OpPrereqError("Not enough disks to import."
6198 " (instance: %d, export: %d)" %
6199 (instance_disks, export_disks),
6202 self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6204 for idx in range(export_disks):
6205 option = 'disk%d_dump' % idx
6206 if export_info.has_option(constants.INISECT_INS, option):
6207 # FIXME: are the old os-es, disk sizes, etc. useful?
6208 export_name = export_info.get(constants.INISECT_INS, option)
6209 image = utils.PathJoin(src_path, export_name)
6210 disk_images.append(image)
6212 disk_images.append(False)
6214 self.src_images = disk_images
6216 old_name = export_info.get(constants.INISECT_INS, 'name')
6217 # FIXME: int() here could throw a ValueError on broken exports
6218 exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6219 if self.op.instance_name == old_name:
6220 for idx, nic in enumerate(self.nics):
6221 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6222 nic_mac_ini = 'nic%d_mac' % idx
6223 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6225 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6227 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6228 if self.op.ip_check:
6229 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6230 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6231 (self.check_ip, self.op.instance_name),
6232 errors.ECODE_NOTUNIQUE)
6234 #### mac address generation
6235 # By generating here the mac address both the allocator and the hooks get
6236 # the real final mac address rather than the 'auto' or 'generate' value.
6237 # There is a race condition between the generation and the instance object
6238 # creation, which means that we know the mac is valid now, but we're not
6239 # sure it will be when we actually add the instance. If things go bad
6240 # adding the instance will abort because of a duplicate mac, and the
6241 # creation job will fail.
6242 for nic in self.nics:
6243 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6244 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6248 if self.op.iallocator is not None:
6249 self._RunAllocator()
6251 #### node related checks
6253 # check primary node
6254 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6255 assert self.pnode is not None, \
6256 "Cannot retrieve locked node %s" % self.op.pnode
6258 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6259 pnode.name, errors.ECODE_STATE)
6261 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6262 pnode.name, errors.ECODE_STATE)
6264 self.secondaries = []
6266 # mirror node verification
6267 if self.op.disk_template in constants.DTS_NET_MIRROR:
6268 if self.op.snode is None:
6269 raise errors.OpPrereqError("The networked disk templates need"
6270 " a mirror node", errors.ECODE_INVAL)
6271 if self.op.snode == pnode.name:
6272 raise errors.OpPrereqError("The secondary node cannot be the"
6273 " primary node.", errors.ECODE_INVAL)
6274 _CheckNodeOnline(self, self.op.snode)
6275 _CheckNodeNotDrained(self, self.op.snode)
6276 self.secondaries.append(self.op.snode)
6278 nodenames = [pnode.name] + self.secondaries
6280 req_size = _ComputeDiskSize(self.op.disk_template,
6283 # Check lv size requirements, if not adopting
6284 if req_size is not None and not self.adopt_disks:
6285 _CheckNodesFreeDisk(self, nodenames, req_size)
6287 if self.adopt_disks: # instead, we must check the adoption data
6288 all_lvs = set([i["adopt"] for i in self.disks])
6289 if len(all_lvs) != len(self.disks):
6290 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6292 for lv_name in all_lvs:
6294 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6295 except errors.ReservationError:
6296 raise errors.OpPrereqError("LV named %s used by another instance" %
6297 lv_name, errors.ECODE_NOTUNIQUE)
6299 node_lvs = self.rpc.call_lv_list([pnode.name],
6300 self.cfg.GetVGName())[pnode.name]
6301 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6302 node_lvs = node_lvs.payload
6303 delta = all_lvs.difference(node_lvs.keys())
6305 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6306 utils.CommaJoin(delta),
6308 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6310 raise errors.OpPrereqError("Online logical volumes found, cannot"
6311 " adopt: %s" % utils.CommaJoin(online_lvs),
6313 # update the size of disk based on what is found
6314 for dsk in self.disks:
6315 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6317 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6320 result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6321 result.Raise("OS '%s' not in supported os list for primary node %s" %
6322 (self.op.os_type, pnode.name),
6323 prereq=True, ecode=errors.ECODE_INVAL)
6324 if not self.op.force_variant:
6325 _CheckOSVariant(result.payload, self.op.os_type)
6327 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6329 # memory check on primary node
6331 _CheckNodeFreeMemory(self, self.pnode.name,
6332 "creating instance %s" % self.op.instance_name,
6333 self.be_full[constants.BE_MEMORY],
6336 self.dry_run_result = list(nodenames)
6338 def Exec(self, feedback_fn):
6339 """Create and add the instance to the cluster.
6342 instance = self.op.instance_name
6343 pnode_name = self.pnode.name
6345 ht_kind = self.op.hypervisor
6346 if ht_kind in constants.HTS_REQ_PORT:
6347 network_port = self.cfg.AllocatePort()
6351 # this is needed because os.path.join does not accept None arguments
6352 if self.op.file_storage_dir is None:
6353 string_file_storage_dir = ""
6355 string_file_storage_dir = self.op.file_storage_dir
6357 # build the full file storage dir path
6358 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6359 string_file_storage_dir, instance)
6361 disks = _GenerateDiskTemplate(self,
6362 self.op.disk_template,
6363 instance, pnode_name,
6367 self.op.file_driver,
6370 iobj = objects.Instance(name=instance, os=self.op.os_type,
6371 primary_node=pnode_name,
6372 nics=self.nics, disks=disks,
6373 disk_template=self.op.disk_template,
6375 network_port=network_port,
6376 beparams=self.op.beparams,
6377 hvparams=self.op.hvparams,
6378 hypervisor=self.op.hypervisor,
6381 if self.adopt_disks:
6382 # rename LVs to the newly-generated names; we need to construct
6383 # 'fake' LV disks with the old data, plus the new unique_id
6384 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6386 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6387 rename_to.append(t_dsk.logical_id)
6388 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6389 self.cfg.SetDiskID(t_dsk, pnode_name)
6390 result = self.rpc.call_blockdev_rename(pnode_name,
6391 zip(tmp_disks, rename_to))
6392 result.Raise("Failed to rename adoped LVs")
6394 feedback_fn("* creating instance disks...")
6396 _CreateDisks(self, iobj)
6397 except errors.OpExecError:
6398 self.LogWarning("Device creation failed, reverting...")
6400 _RemoveDisks(self, iobj)
6402 self.cfg.ReleaseDRBDMinors(instance)
6405 feedback_fn("adding instance %s to cluster config" % instance)
6407 self.cfg.AddInstance(iobj, self.proc.GetECId())
6409 # Declare that we don't want to remove the instance lock anymore, as we've
6410 # added the instance to the config
6411 del self.remove_locks[locking.LEVEL_INSTANCE]
6412 # Unlock all the nodes
6413 if self.op.mode == constants.INSTANCE_IMPORT:
6414 nodes_keep = [self.op.src_node]
6415 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6416 if node != self.op.src_node]
6417 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6418 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6420 self.context.glm.release(locking.LEVEL_NODE)
6421 del self.acquired_locks[locking.LEVEL_NODE]
6423 if self.op.wait_for_sync:
6424 disk_abort = not _WaitForSync(self, iobj)
6425 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6426 # make sure the disks are not degraded (still sync-ing is ok)
6428 feedback_fn("* checking mirrors status")
6429 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6434 _RemoveDisks(self, iobj)
6435 self.cfg.RemoveInstance(iobj.name)
6436 # Make sure the instance lock gets removed
6437 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6438 raise errors.OpExecError("There are some degraded disks for"
6441 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6442 if self.op.mode == constants.INSTANCE_CREATE:
6443 feedback_fn("* running the instance OS create scripts...")
6444 # FIXME: pass debug option from opcode to backend
6445 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6446 self.op.debug_level)
6447 result.Raise("Could not add os for instance %s"
6448 " on node %s" % (instance, pnode_name))
6450 elif self.op.mode == constants.INSTANCE_IMPORT:
6451 feedback_fn("* running the instance OS import scripts...")
6452 src_node = self.op.src_node
6453 src_images = self.src_images
6454 cluster_name = self.cfg.GetClusterName()
6455 # FIXME: pass debug option from opcode to backend
6456 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6457 src_node, src_images,
6459 self.op.debug_level)
6460 msg = import_result.fail_msg
6462 self.LogWarning("Error while importing the disk images for instance"
6463 " %s on node %s: %s" % (instance, pnode_name, msg))
6465 # also checked in the prereq part
6466 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6470 iobj.admin_up = True
6471 self.cfg.Update(iobj, feedback_fn)
6472 logging.info("Starting instance %s on node %s", instance, pnode_name)
6473 feedback_fn("* starting instance...")
6474 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6475 result.Raise("Could not start instance")
6477 return list(iobj.all_nodes)
6480 class LUConnectConsole(NoHooksLU):
6481 """Connect to an instance's console.
6483 This is somewhat special in that it returns the command line that
6484 you need to run on the master node in order to connect to the
6488 _OP_REQP = ["instance_name"]
6491 def ExpandNames(self):
6492 self._ExpandAndLockInstance()
6494 def CheckPrereq(self):
6495 """Check prerequisites.
6497 This checks that the instance is in the cluster.
6500 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6501 assert self.instance is not None, \
6502 "Cannot retrieve locked instance %s" % self.op.instance_name
6503 _CheckNodeOnline(self, self.instance.primary_node)
6505 def Exec(self, feedback_fn):
6506 """Connect to the console of an instance
6509 instance = self.instance
6510 node = instance.primary_node
6512 node_insts = self.rpc.call_instance_list([node],
6513 [instance.hypervisor])[node]
6514 node_insts.Raise("Can't get node information from %s" % node)
6516 if instance.name not in node_insts.payload:
6517 raise errors.OpExecError("Instance %s is not running." % instance.name)
6519 logging.debug("Connecting to console of %s on %s", instance.name, node)
6521 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6522 cluster = self.cfg.GetClusterInfo()
6523 # beparams and hvparams are passed separately, to avoid editing the
6524 # instance and then saving the defaults in the instance itself.
6525 hvparams = cluster.FillHV(instance)
6526 beparams = cluster.FillBE(instance)
6527 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6530 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6533 class LUReplaceDisks(LogicalUnit):
6534 """Replace the disks of an instance.
6537 HPATH = "mirrors-replace"
6538 HTYPE = constants.HTYPE_INSTANCE
6539 _OP_REQP = ["instance_name", "mode", "disks"]
6542 def CheckArguments(self):
6543 if not hasattr(self.op, "remote_node"):
6544 self.op.remote_node = None
6545 if not hasattr(self.op, "iallocator"):
6546 self.op.iallocator = None
6547 if not hasattr(self.op, "early_release"):
6548 self.op.early_release = False
6550 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6553 def ExpandNames(self):
6554 self._ExpandAndLockInstance()
6556 if self.op.iallocator is not None:
6557 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6559 elif self.op.remote_node is not None:
6560 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6561 self.op.remote_node = remote_node
6563 # Warning: do not remove the locking of the new secondary here
6564 # unless DRBD8.AddChildren is changed to work in parallel;
6565 # currently it doesn't since parallel invocations of
6566 # FindUnusedMinor will conflict
6567 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6568 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6571 self.needed_locks[locking.LEVEL_NODE] = []
6572 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6574 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6575 self.op.iallocator, self.op.remote_node,
6576 self.op.disks, False, self.op.early_release)
6578 self.tasklets = [self.replacer]
6580 def DeclareLocks(self, level):
6581 # If we're not already locking all nodes in the set we have to declare the
6582 # instance's primary/secondary nodes.
6583 if (level == locking.LEVEL_NODE and
6584 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6585 self._LockInstancesNodes()
6587 def BuildHooksEnv(self):
6590 This runs on the master, the primary and all the secondaries.
6593 instance = self.replacer.instance
6595 "MODE": self.op.mode,
6596 "NEW_SECONDARY": self.op.remote_node,
6597 "OLD_SECONDARY": instance.secondary_nodes[0],
6599 env.update(_BuildInstanceHookEnvByObject(self, instance))
6601 self.cfg.GetMasterNode(),
6602 instance.primary_node,
6604 if self.op.remote_node is not None:
6605 nl.append(self.op.remote_node)
6609 class LUEvacuateNode(LogicalUnit):
6610 """Relocate the secondary instances from a node.
6613 HPATH = "node-evacuate"
6614 HTYPE = constants.HTYPE_NODE
6615 _OP_REQP = ["node_name"]
6618 def CheckArguments(self):
6619 if not hasattr(self.op, "remote_node"):
6620 self.op.remote_node = None
6621 if not hasattr(self.op, "iallocator"):
6622 self.op.iallocator = None
6623 if not hasattr(self.op, "early_release"):
6624 self.op.early_release = False
6626 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6627 self.op.remote_node,
6630 def ExpandNames(self):
6631 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6633 self.needed_locks = {}
6635 # Declare node locks
6636 if self.op.iallocator is not None:
6637 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6639 elif self.op.remote_node is not None:
6640 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6642 # Warning: do not remove the locking of the new secondary here
6643 # unless DRBD8.AddChildren is changed to work in parallel;
6644 # currently it doesn't since parallel invocations of
6645 # FindUnusedMinor will conflict
6646 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6647 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6650 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6652 # Create tasklets for replacing disks for all secondary instances on this
6657 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6658 logging.debug("Replacing disks for instance %s", inst.name)
6659 names.append(inst.name)
6661 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6662 self.op.iallocator, self.op.remote_node, [],
6663 True, self.op.early_release)
6664 tasklets.append(replacer)
6666 self.tasklets = tasklets
6667 self.instance_names = names
6669 # Declare instance locks
6670 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6672 def DeclareLocks(self, level):
6673 # If we're not already locking all nodes in the set we have to declare the
6674 # instance's primary/secondary nodes.
6675 if (level == locking.LEVEL_NODE and
6676 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6677 self._LockInstancesNodes()
6679 def BuildHooksEnv(self):
6682 This runs on the master, the primary and all the secondaries.
6686 "NODE_NAME": self.op.node_name,
6689 nl = [self.cfg.GetMasterNode()]
6691 if self.op.remote_node is not None:
6692 env["NEW_SECONDARY"] = self.op.remote_node
6693 nl.append(self.op.remote_node)
6695 return (env, nl, nl)
6698 class TLReplaceDisks(Tasklet):
6699 """Replaces disks for an instance.
6701 Note: Locking is not within the scope of this class.
6704 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6705 disks, delay_iallocator, early_release):
6706 """Initializes this class.
6709 Tasklet.__init__(self, lu)
6712 self.instance_name = instance_name
6714 self.iallocator_name = iallocator_name
6715 self.remote_node = remote_node
6717 self.delay_iallocator = delay_iallocator
6718 self.early_release = early_release
6721 self.instance = None
6722 self.new_node = None
6723 self.target_node = None
6724 self.other_node = None
6725 self.remote_node_info = None
6726 self.node_secondary_ip = None
6729 def CheckArguments(mode, remote_node, iallocator):
6730 """Helper function for users of this class.
6733 # check for valid parameter combination
6734 if mode == constants.REPLACE_DISK_CHG:
6735 if remote_node is None and iallocator is None:
6736 raise errors.OpPrereqError("When changing the secondary either an"
6737 " iallocator script must be used or the"
6738 " new node given", errors.ECODE_INVAL)
6740 if remote_node is not None and iallocator is not None:
6741 raise errors.OpPrereqError("Give either the iallocator or the new"
6742 " secondary, not both", errors.ECODE_INVAL)
6744 elif remote_node is not None or iallocator is not None:
6745 # Not replacing the secondary
6746 raise errors.OpPrereqError("The iallocator and new node options can"
6747 " only be used when changing the"
6748 " secondary node", errors.ECODE_INVAL)
6751 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6752 """Compute a new secondary node using an IAllocator.
6755 ial = IAllocator(lu.cfg, lu.rpc,
6756 mode=constants.IALLOCATOR_MODE_RELOC,
6758 relocate_from=relocate_from)
6760 ial.Run(iallocator_name)
6763 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6764 " %s" % (iallocator_name, ial.info),
6767 if len(ial.result) != ial.required_nodes:
6768 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6769 " of nodes (%s), required %s" %
6771 len(ial.result), ial.required_nodes),
6774 remote_node_name = ial.result[0]
6776 lu.LogInfo("Selected new secondary for instance '%s': %s",
6777 instance_name, remote_node_name)
6779 return remote_node_name
6781 def _FindFaultyDisks(self, node_name):
6782 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6785 def CheckPrereq(self):
6786 """Check prerequisites.
6788 This checks that the instance is in the cluster.
6791 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6792 assert instance is not None, \
6793 "Cannot retrieve locked instance %s" % self.instance_name
6795 if instance.disk_template != constants.DT_DRBD8:
6796 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6797 " instances", errors.ECODE_INVAL)
6799 if len(instance.secondary_nodes) != 1:
6800 raise errors.OpPrereqError("The instance has a strange layout,"
6801 " expected one secondary but found %d" %
6802 len(instance.secondary_nodes),
6805 if not self.delay_iallocator:
6806 self._CheckPrereq2()
6808 def _CheckPrereq2(self):
6809 """Check prerequisites, second part.
6811 This function should always be part of CheckPrereq. It was separated and is
6812 now called from Exec because during node evacuation iallocator was only
6813 called with an unmodified cluster model, not taking planned changes into
6817 instance = self.instance
6818 secondary_node = instance.secondary_nodes[0]
6820 if self.iallocator_name is None:
6821 remote_node = self.remote_node
6823 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6824 instance.name, instance.secondary_nodes)
6826 if remote_node is not None:
6827 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6828 assert self.remote_node_info is not None, \
6829 "Cannot retrieve locked node %s" % remote_node
6831 self.remote_node_info = None
6833 if remote_node == self.instance.primary_node:
6834 raise errors.OpPrereqError("The specified node is the primary node of"
6835 " the instance.", errors.ECODE_INVAL)
6837 if remote_node == secondary_node:
6838 raise errors.OpPrereqError("The specified node is already the"
6839 " secondary node of the instance.",
6842 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6843 constants.REPLACE_DISK_CHG):
6844 raise errors.OpPrereqError("Cannot specify disks to be replaced",
6847 if self.mode == constants.REPLACE_DISK_AUTO:
6848 faulty_primary = self._FindFaultyDisks(instance.primary_node)
6849 faulty_secondary = self._FindFaultyDisks(secondary_node)
6851 if faulty_primary and faulty_secondary:
6852 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6853 " one node and can not be repaired"
6854 " automatically" % self.instance_name,
6858 self.disks = faulty_primary
6859 self.target_node = instance.primary_node
6860 self.other_node = secondary_node
6861 check_nodes = [self.target_node, self.other_node]
6862 elif faulty_secondary:
6863 self.disks = faulty_secondary
6864 self.target_node = secondary_node
6865 self.other_node = instance.primary_node
6866 check_nodes = [self.target_node, self.other_node]
6872 # Non-automatic modes
6873 if self.mode == constants.REPLACE_DISK_PRI:
6874 self.target_node = instance.primary_node
6875 self.other_node = secondary_node
6876 check_nodes = [self.target_node, self.other_node]
6878 elif self.mode == constants.REPLACE_DISK_SEC:
6879 self.target_node = secondary_node
6880 self.other_node = instance.primary_node
6881 check_nodes = [self.target_node, self.other_node]
6883 elif self.mode == constants.REPLACE_DISK_CHG:
6884 self.new_node = remote_node
6885 self.other_node = instance.primary_node
6886 self.target_node = secondary_node
6887 check_nodes = [self.new_node, self.other_node]
6889 _CheckNodeNotDrained(self.lu, remote_node)
6891 old_node_info = self.cfg.GetNodeInfo(secondary_node)
6892 assert old_node_info is not None
6893 if old_node_info.offline and not self.early_release:
6894 # doesn't make sense to delay the release
6895 self.early_release = True
6896 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
6897 " early-release mode", secondary_node)
6900 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6903 # If not specified all disks should be replaced
6905 self.disks = range(len(self.instance.disks))
6907 for node in check_nodes:
6908 _CheckNodeOnline(self.lu, node)
6910 # Check whether disks are valid
6911 for disk_idx in self.disks:
6912 instance.FindDisk(disk_idx)
6914 # Get secondary node IP addresses
6917 for node_name in [self.target_node, self.other_node, self.new_node]:
6918 if node_name is not None:
6919 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6921 self.node_secondary_ip = node_2nd_ip
6923 def Exec(self, feedback_fn):
6924 """Execute disk replacement.
6926 This dispatches the disk replacement to the appropriate handler.
6929 if self.delay_iallocator:
6930 self._CheckPrereq2()
6933 feedback_fn("No disks need replacement")
6936 feedback_fn("Replacing disk(s) %s for %s" %
6937 (utils.CommaJoin(self.disks), self.instance.name))
6939 activate_disks = (not self.instance.admin_up)
6941 # Activate the instance disks if we're replacing them on a down instance
6943 _StartInstanceDisks(self.lu, self.instance, True)
6946 # Should we replace the secondary node?
6947 if self.new_node is not None:
6948 fn = self._ExecDrbd8Secondary
6950 fn = self._ExecDrbd8DiskOnly
6952 return fn(feedback_fn)
6955 # Deactivate the instance disks if we're replacing them on a
6958 _SafeShutdownInstanceDisks(self.lu, self.instance)
6960 def _CheckVolumeGroup(self, nodes):
6961 self.lu.LogInfo("Checking volume groups")
6963 vgname = self.cfg.GetVGName()
6965 # Make sure volume group exists on all involved nodes
6966 results = self.rpc.call_vg_list(nodes)
6968 raise errors.OpExecError("Can't list volume groups on the nodes")
6972 res.Raise("Error checking node %s" % node)
6973 if vgname not in res.payload:
6974 raise errors.OpExecError("Volume group '%s' not found on node %s" %
6977 def _CheckDisksExistence(self, nodes):
6978 # Check disk existence
6979 for idx, dev in enumerate(self.instance.disks):
6980 if idx not in self.disks:
6984 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6985 self.cfg.SetDiskID(dev, node)
6987 result = self.rpc.call_blockdev_find(node, dev)
6989 msg = result.fail_msg
6990 if msg or not result.payload:
6992 msg = "disk not found"
6993 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6996 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6997 for idx, dev in enumerate(self.instance.disks):
6998 if idx not in self.disks:
7001 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7004 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7006 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7007 " replace disks for instance %s" %
7008 (node_name, self.instance.name))
7010 def _CreateNewStorage(self, node_name):
7011 vgname = self.cfg.GetVGName()
7014 for idx, dev in enumerate(self.instance.disks):
7015 if idx not in self.disks:
7018 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7020 self.cfg.SetDiskID(dev, node_name)
7022 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7023 names = _GenerateUniqueNames(self.lu, lv_names)
7025 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7026 logical_id=(vgname, names[0]))
7027 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7028 logical_id=(vgname, names[1]))
7030 new_lvs = [lv_data, lv_meta]
7031 old_lvs = dev.children
7032 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7034 # we pass force_create=True to force the LVM creation
7035 for new_lv in new_lvs:
7036 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7037 _GetInstanceInfoText(self.instance), False)
7041 def _CheckDevices(self, node_name, iv_names):
7042 for name, (dev, _, _) in iv_names.iteritems():
7043 self.cfg.SetDiskID(dev, node_name)
7045 result = self.rpc.call_blockdev_find(node_name, dev)
7047 msg = result.fail_msg
7048 if msg or not result.payload:
7050 msg = "disk not found"
7051 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7054 if result.payload.is_degraded:
7055 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7057 def _RemoveOldStorage(self, node_name, iv_names):
7058 for name, (_, old_lvs, _) in iv_names.iteritems():
7059 self.lu.LogInfo("Remove logical volumes for %s" % name)
7062 self.cfg.SetDiskID(lv, node_name)
7064 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7066 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7067 hint="remove unused LVs manually")
7069 def _ReleaseNodeLock(self, node_name):
7070 """Releases the lock for a given node."""
7071 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7073 def _ExecDrbd8DiskOnly(self, feedback_fn):
7074 """Replace a disk on the primary or secondary for DRBD 8.
7076 The algorithm for replace is quite complicated:
7078 1. for each disk to be replaced:
7080 1. create new LVs on the target node with unique names
7081 1. detach old LVs from the drbd device
7082 1. rename old LVs to name_replaced.<time_t>
7083 1. rename new LVs to old LVs
7084 1. attach the new LVs (with the old names now) to the drbd device
7086 1. wait for sync across all devices
7088 1. for each modified disk:
7090 1. remove old LVs (which have the name name_replaces.<time_t>)
7092 Failures are not very well handled.
7097 # Step: check device activation
7098 self.lu.LogStep(1, steps_total, "Check device existence")
7099 self._CheckDisksExistence([self.other_node, self.target_node])
7100 self._CheckVolumeGroup([self.target_node, self.other_node])
7102 # Step: check other node consistency
7103 self.lu.LogStep(2, steps_total, "Check peer consistency")
7104 self._CheckDisksConsistency(self.other_node,
7105 self.other_node == self.instance.primary_node,
7108 # Step: create new storage
7109 self.lu.LogStep(3, steps_total, "Allocate new storage")
7110 iv_names = self._CreateNewStorage(self.target_node)
7112 # Step: for each lv, detach+rename*2+attach
7113 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7114 for dev, old_lvs, new_lvs in iv_names.itervalues():
7115 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7117 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7119 result.Raise("Can't detach drbd from local storage on node"
7120 " %s for device %s" % (self.target_node, dev.iv_name))
7122 #cfg.Update(instance)
7124 # ok, we created the new LVs, so now we know we have the needed
7125 # storage; as such, we proceed on the target node to rename
7126 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7127 # using the assumption that logical_id == physical_id (which in
7128 # turn is the unique_id on that node)
7130 # FIXME(iustin): use a better name for the replaced LVs
7131 temp_suffix = int(time.time())
7132 ren_fn = lambda d, suff: (d.physical_id[0],
7133 d.physical_id[1] + "_replaced-%s" % suff)
7135 # Build the rename list based on what LVs exist on the node
7136 rename_old_to_new = []
7137 for to_ren in old_lvs:
7138 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7139 if not result.fail_msg and result.payload:
7141 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7143 self.lu.LogInfo("Renaming the old LVs on the target node")
7144 result = self.rpc.call_blockdev_rename(self.target_node,
7146 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7148 # Now we rename the new LVs to the old LVs
7149 self.lu.LogInfo("Renaming the new LVs on the target node")
7150 rename_new_to_old = [(new, old.physical_id)
7151 for old, new in zip(old_lvs, new_lvs)]
7152 result = self.rpc.call_blockdev_rename(self.target_node,
7154 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7156 for old, new in zip(old_lvs, new_lvs):
7157 new.logical_id = old.logical_id
7158 self.cfg.SetDiskID(new, self.target_node)
7160 for disk in old_lvs:
7161 disk.logical_id = ren_fn(disk, temp_suffix)
7162 self.cfg.SetDiskID(disk, self.target_node)
7164 # Now that the new lvs have the old name, we can add them to the device
7165 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7166 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7168 msg = result.fail_msg
7170 for new_lv in new_lvs:
7171 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7174 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7175 hint=("cleanup manually the unused logical"
7177 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7179 dev.children = new_lvs
7181 self.cfg.Update(self.instance, feedback_fn)
7184 if self.early_release:
7185 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7187 self._RemoveOldStorage(self.target_node, iv_names)
7188 # WARNING: we release both node locks here, do not do other RPCs
7189 # than WaitForSync to the primary node
7190 self._ReleaseNodeLock([self.target_node, self.other_node])
7193 # This can fail as the old devices are degraded and _WaitForSync
7194 # does a combined result over all disks, so we don't check its return value
7195 self.lu.LogStep(cstep, steps_total, "Sync devices")
7197 _WaitForSync(self.lu, self.instance)
7199 # Check all devices manually
7200 self._CheckDevices(self.instance.primary_node, iv_names)
7202 # Step: remove old storage
7203 if not self.early_release:
7204 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7206 self._RemoveOldStorage(self.target_node, iv_names)
7208 def _ExecDrbd8Secondary(self, feedback_fn):
7209 """Replace the secondary node for DRBD 8.
7211 The algorithm for replace is quite complicated:
7212 - for all disks of the instance:
7213 - create new LVs on the new node with same names
7214 - shutdown the drbd device on the old secondary
7215 - disconnect the drbd network on the primary
7216 - create the drbd device on the new secondary
7217 - network attach the drbd on the primary, using an artifice:
7218 the drbd code for Attach() will connect to the network if it
7219 finds a device which is connected to the good local disks but
7221 - wait for sync across all devices
7222 - remove all disks from the old secondary
7224 Failures are not very well handled.
7229 # Step: check device activation
7230 self.lu.LogStep(1, steps_total, "Check device existence")
7231 self._CheckDisksExistence([self.instance.primary_node])
7232 self._CheckVolumeGroup([self.instance.primary_node])
7234 # Step: check other node consistency
7235 self.lu.LogStep(2, steps_total, "Check peer consistency")
7236 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7238 # Step: create new storage
7239 self.lu.LogStep(3, steps_total, "Allocate new storage")
7240 for idx, dev in enumerate(self.instance.disks):
7241 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7242 (self.new_node, idx))
7243 # we pass force_create=True to force LVM creation
7244 for new_lv in dev.children:
7245 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7246 _GetInstanceInfoText(self.instance), False)
7248 # Step 4: dbrd minors and drbd setups changes
7249 # after this, we must manually remove the drbd minors on both the
7250 # error and the success paths
7251 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7252 minors = self.cfg.AllocateDRBDMinor([self.new_node
7253 for dev in self.instance.disks],
7255 logging.debug("Allocated minors %r", minors)
7258 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7259 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7260 (self.new_node, idx))
7261 # create new devices on new_node; note that we create two IDs:
7262 # one without port, so the drbd will be activated without
7263 # networking information on the new node at this stage, and one
7264 # with network, for the latter activation in step 4
7265 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7266 if self.instance.primary_node == o_node1:
7269 assert self.instance.primary_node == o_node2, "Three-node instance?"
7272 new_alone_id = (self.instance.primary_node, self.new_node, None,
7273 p_minor, new_minor, o_secret)
7274 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7275 p_minor, new_minor, o_secret)
7277 iv_names[idx] = (dev, dev.children, new_net_id)
7278 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7280 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7281 logical_id=new_alone_id,
7282 children=dev.children,
7285 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7286 _GetInstanceInfoText(self.instance), False)
7287 except errors.GenericError:
7288 self.cfg.ReleaseDRBDMinors(self.instance.name)
7291 # We have new devices, shutdown the drbd on the old secondary
7292 for idx, dev in enumerate(self.instance.disks):
7293 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7294 self.cfg.SetDiskID(dev, self.target_node)
7295 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7297 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7298 "node: %s" % (idx, msg),
7299 hint=("Please cleanup this device manually as"
7300 " soon as possible"))
7302 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7303 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7304 self.node_secondary_ip,
7305 self.instance.disks)\
7306 [self.instance.primary_node]
7308 msg = result.fail_msg
7310 # detaches didn't succeed (unlikely)
7311 self.cfg.ReleaseDRBDMinors(self.instance.name)
7312 raise errors.OpExecError("Can't detach the disks from the network on"
7313 " old node: %s" % (msg,))
7315 # if we managed to detach at least one, we update all the disks of
7316 # the instance to point to the new secondary
7317 self.lu.LogInfo("Updating instance configuration")
7318 for dev, _, new_logical_id in iv_names.itervalues():
7319 dev.logical_id = new_logical_id
7320 self.cfg.SetDiskID(dev, self.instance.primary_node)
7322 self.cfg.Update(self.instance, feedback_fn)
7324 # and now perform the drbd attach
7325 self.lu.LogInfo("Attaching primary drbds to new secondary"
7326 " (standalone => connected)")
7327 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7329 self.node_secondary_ip,
7330 self.instance.disks,
7333 for to_node, to_result in result.items():
7334 msg = to_result.fail_msg
7336 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7338 hint=("please do a gnt-instance info to see the"
7339 " status of disks"))
7341 if self.early_release:
7342 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7344 self._RemoveOldStorage(self.target_node, iv_names)
7345 # WARNING: we release all node locks here, do not do other RPCs
7346 # than WaitForSync to the primary node
7347 self._ReleaseNodeLock([self.instance.primary_node,
7352 # This can fail as the old devices are degraded and _WaitForSync
7353 # does a combined result over all disks, so we don't check its return value
7354 self.lu.LogStep(cstep, steps_total, "Sync devices")
7356 _WaitForSync(self.lu, self.instance)
7358 # Check all devices manually
7359 self._CheckDevices(self.instance.primary_node, iv_names)
7361 # Step: remove old storage
7362 if not self.early_release:
7363 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7364 self._RemoveOldStorage(self.target_node, iv_names)
7367 class LURepairNodeStorage(NoHooksLU):
7368 """Repairs the volume group on a node.
7371 _OP_REQP = ["node_name"]
7374 def CheckArguments(self):
7375 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7377 def ExpandNames(self):
7378 self.needed_locks = {
7379 locking.LEVEL_NODE: [self.op.node_name],
7382 def _CheckFaultyDisks(self, instance, node_name):
7383 """Ensure faulty disks abort the opcode or at least warn."""
7385 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7387 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7388 " node '%s'" % (instance.name, node_name),
7390 except errors.OpPrereqError, err:
7391 if self.op.ignore_consistency:
7392 self.proc.LogWarning(str(err.args[0]))
7396 def CheckPrereq(self):
7397 """Check prerequisites.
7400 storage_type = self.op.storage_type
7402 if (constants.SO_FIX_CONSISTENCY not in
7403 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7404 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7405 " repaired" % storage_type,
7408 # Check whether any instance on this node has faulty disks
7409 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7410 if not inst.admin_up:
7412 check_nodes = set(inst.all_nodes)
7413 check_nodes.discard(self.op.node_name)
7414 for inst_node_name in check_nodes:
7415 self._CheckFaultyDisks(inst, inst_node_name)
7417 def Exec(self, feedback_fn):
7418 feedback_fn("Repairing storage unit '%s' on %s ..." %
7419 (self.op.name, self.op.node_name))
7421 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7422 result = self.rpc.call_storage_execute(self.op.node_name,
7423 self.op.storage_type, st_args,
7425 constants.SO_FIX_CONSISTENCY)
7426 result.Raise("Failed to repair storage unit '%s' on %s" %
7427 (self.op.name, self.op.node_name))
7430 class LUNodeEvacuationStrategy(NoHooksLU):
7431 """Computes the node evacuation strategy.
7434 _OP_REQP = ["nodes"]
7437 def CheckArguments(self):
7438 if not hasattr(self.op, "remote_node"):
7439 self.op.remote_node = None
7440 if not hasattr(self.op, "iallocator"):
7441 self.op.iallocator = None
7442 if self.op.remote_node is not None and self.op.iallocator is not None:
7443 raise errors.OpPrereqError("Give either the iallocator or the new"
7444 " secondary, not both", errors.ECODE_INVAL)
7446 def ExpandNames(self):
7447 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7448 self.needed_locks = locks = {}
7449 if self.op.remote_node is None:
7450 locks[locking.LEVEL_NODE] = locking.ALL_SET
7452 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7453 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7455 def CheckPrereq(self):
7458 def Exec(self, feedback_fn):
7459 if self.op.remote_node is not None:
7461 for node in self.op.nodes:
7462 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7465 if i.primary_node == self.op.remote_node:
7466 raise errors.OpPrereqError("Node %s is the primary node of"
7467 " instance %s, cannot use it as"
7469 (self.op.remote_node, i.name),
7471 result.append([i.name, self.op.remote_node])
7473 ial = IAllocator(self.cfg, self.rpc,
7474 mode=constants.IALLOCATOR_MODE_MEVAC,
7475 evac_nodes=self.op.nodes)
7476 ial.Run(self.op.iallocator, validate=True)
7478 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7484 class LUGrowDisk(LogicalUnit):
7485 """Grow a disk of an instance.
7489 HTYPE = constants.HTYPE_INSTANCE
7490 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7493 def ExpandNames(self):
7494 self._ExpandAndLockInstance()
7495 self.needed_locks[locking.LEVEL_NODE] = []
7496 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7498 def DeclareLocks(self, level):
7499 if level == locking.LEVEL_NODE:
7500 self._LockInstancesNodes()
7502 def BuildHooksEnv(self):
7505 This runs on the master, the primary and all the secondaries.
7509 "DISK": self.op.disk,
7510 "AMOUNT": self.op.amount,
7512 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7513 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7516 def CheckPrereq(self):
7517 """Check prerequisites.
7519 This checks that the instance is in the cluster.
7522 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7523 assert instance is not None, \
7524 "Cannot retrieve locked instance %s" % self.op.instance_name
7525 nodenames = list(instance.all_nodes)
7526 for node in nodenames:
7527 _CheckNodeOnline(self, node)
7530 self.instance = instance
7532 if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7533 raise errors.OpPrereqError("Instance's disk layout does not support"
7534 " growing.", errors.ECODE_INVAL)
7536 self.disk = instance.FindDisk(self.op.disk)
7538 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7540 def Exec(self, feedback_fn):
7541 """Execute disk grow.
7544 instance = self.instance
7546 for node in instance.all_nodes:
7547 self.cfg.SetDiskID(disk, node)
7548 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7549 result.Raise("Grow request failed to node %s" % node)
7551 # TODO: Rewrite code to work properly
7552 # DRBD goes into sync mode for a short amount of time after executing the
7553 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7554 # calling "resize" in sync mode fails. Sleeping for a short amount of
7555 # time is a work-around.
7558 disk.RecordGrow(self.op.amount)
7559 self.cfg.Update(instance, feedback_fn)
7560 if self.op.wait_for_sync:
7561 disk_abort = not _WaitForSync(self, instance)
7563 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7564 " status.\nPlease check the instance.")
7567 class LUQueryInstanceData(NoHooksLU):
7568 """Query runtime instance data.
7571 _OP_REQP = ["instances", "static"]
7574 def ExpandNames(self):
7575 self.needed_locks = {}
7576 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7578 if not isinstance(self.op.instances, list):
7579 raise errors.OpPrereqError("Invalid argument type 'instances'",
7582 if self.op.instances:
7583 self.wanted_names = []
7584 for name in self.op.instances:
7585 full_name = _ExpandInstanceName(self.cfg, name)
7586 self.wanted_names.append(full_name)
7587 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7589 self.wanted_names = None
7590 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7592 self.needed_locks[locking.LEVEL_NODE] = []
7593 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7595 def DeclareLocks(self, level):
7596 if level == locking.LEVEL_NODE:
7597 self._LockInstancesNodes()
7599 def CheckPrereq(self):
7600 """Check prerequisites.
7602 This only checks the optional instance list against the existing names.
7605 if self.wanted_names is None:
7606 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7608 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7609 in self.wanted_names]
7612 def _ComputeBlockdevStatus(self, node, instance_name, dev):
7613 """Returns the status of a block device
7616 if self.op.static or not node:
7619 self.cfg.SetDiskID(dev, node)
7621 result = self.rpc.call_blockdev_find(node, dev)
7625 result.Raise("Can't compute disk status for %s" % instance_name)
7627 status = result.payload
7631 return (status.dev_path, status.major, status.minor,
7632 status.sync_percent, status.estimated_time,
7633 status.is_degraded, status.ldisk_status)
7635 def _ComputeDiskStatus(self, instance, snode, dev):
7636 """Compute block device status.
7639 if dev.dev_type in constants.LDS_DRBD:
7640 # we change the snode then (otherwise we use the one passed in)
7641 if dev.logical_id[0] == instance.primary_node:
7642 snode = dev.logical_id[1]
7644 snode = dev.logical_id[0]
7646 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7648 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7651 dev_children = [self._ComputeDiskStatus(instance, snode, child)
7652 for child in dev.children]
7657 "iv_name": dev.iv_name,
7658 "dev_type": dev.dev_type,
7659 "logical_id": dev.logical_id,
7660 "physical_id": dev.physical_id,
7661 "pstatus": dev_pstatus,
7662 "sstatus": dev_sstatus,
7663 "children": dev_children,
7670 def Exec(self, feedback_fn):
7671 """Gather and return data"""
7674 cluster = self.cfg.GetClusterInfo()
7676 for instance in self.wanted_instances:
7677 if not self.op.static:
7678 remote_info = self.rpc.call_instance_info(instance.primary_node,
7680 instance.hypervisor)
7681 remote_info.Raise("Error checking node %s" % instance.primary_node)
7682 remote_info = remote_info.payload
7683 if remote_info and "state" in remote_info:
7686 remote_state = "down"
7689 if instance.admin_up:
7692 config_state = "down"
7694 disks = [self._ComputeDiskStatus(instance, None, device)
7695 for device in instance.disks]
7698 "name": instance.name,
7699 "config_state": config_state,
7700 "run_state": remote_state,
7701 "pnode": instance.primary_node,
7702 "snodes": instance.secondary_nodes,
7704 # this happens to be the same format used for hooks
7705 "nics": _NICListToTuple(self, instance.nics),
7707 "hypervisor": instance.hypervisor,
7708 "network_port": instance.network_port,
7709 "hv_instance": instance.hvparams,
7710 "hv_actual": cluster.FillHV(instance, skip_globals=True),
7711 "be_instance": instance.beparams,
7712 "be_actual": cluster.FillBE(instance),
7713 "serial_no": instance.serial_no,
7714 "mtime": instance.mtime,
7715 "ctime": instance.ctime,
7716 "uuid": instance.uuid,
7719 result[instance.name] = idict
7724 class LUSetInstanceParams(LogicalUnit):
7725 """Modifies an instances's parameters.
7728 HPATH = "instance-modify"
7729 HTYPE = constants.HTYPE_INSTANCE
7730 _OP_REQP = ["instance_name"]
7733 def CheckArguments(self):
7734 if not hasattr(self.op, 'nics'):
7736 if not hasattr(self.op, 'disks'):
7738 if not hasattr(self.op, 'beparams'):
7739 self.op.beparams = {}
7740 if not hasattr(self.op, 'hvparams'):
7741 self.op.hvparams = {}
7742 if not hasattr(self.op, "disk_template"):
7743 self.op.disk_template = None
7744 if not hasattr(self.op, "remote_node"):
7745 self.op.remote_node = None
7746 self.op.force = getattr(self.op, "force", False)
7747 if not (self.op.nics or self.op.disks or self.op.disk_template or
7748 self.op.hvparams or self.op.beparams):
7749 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7751 if self.op.hvparams:
7752 _CheckGlobalHvParams(self.op.hvparams)
7756 for disk_op, disk_dict in self.op.disks:
7757 if disk_op == constants.DDM_REMOVE:
7760 elif disk_op == constants.DDM_ADD:
7763 if not isinstance(disk_op, int):
7764 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7765 if not isinstance(disk_dict, dict):
7766 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7767 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7769 if disk_op == constants.DDM_ADD:
7770 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7771 if mode not in constants.DISK_ACCESS_SET:
7772 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7774 size = disk_dict.get('size', None)
7776 raise errors.OpPrereqError("Required disk parameter size missing",
7780 except (TypeError, ValueError), err:
7781 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7782 str(err), errors.ECODE_INVAL)
7783 disk_dict['size'] = size
7785 # modification of disk
7786 if 'size' in disk_dict:
7787 raise errors.OpPrereqError("Disk size change not possible, use"
7788 " grow-disk", errors.ECODE_INVAL)
7790 if disk_addremove > 1:
7791 raise errors.OpPrereqError("Only one disk add or remove operation"
7792 " supported at a time", errors.ECODE_INVAL)
7794 if self.op.disks and self.op.disk_template is not None:
7795 raise errors.OpPrereqError("Disk template conversion and other disk"
7796 " changes not supported at the same time",
7799 if self.op.disk_template:
7800 _CheckDiskTemplate(self.op.disk_template)
7801 if (self.op.disk_template in constants.DTS_NET_MIRROR and
7802 self.op.remote_node is None):
7803 raise errors.OpPrereqError("Changing the disk template to a mirrored"
7804 " one requires specifying a secondary node",
7809 for nic_op, nic_dict in self.op.nics:
7810 if nic_op == constants.DDM_REMOVE:
7813 elif nic_op == constants.DDM_ADD:
7816 if not isinstance(nic_op, int):
7817 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7818 if not isinstance(nic_dict, dict):
7819 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7820 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7822 # nic_dict should be a dict
7823 nic_ip = nic_dict.get('ip', None)
7824 if nic_ip is not None:
7825 if nic_ip.lower() == constants.VALUE_NONE:
7826 nic_dict['ip'] = None
7828 if not utils.IsValidIP(nic_ip):
7829 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7832 nic_bridge = nic_dict.get('bridge', None)
7833 nic_link = nic_dict.get('link', None)
7834 if nic_bridge and nic_link:
7835 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7836 " at the same time", errors.ECODE_INVAL)
7837 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7838 nic_dict['bridge'] = None
7839 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7840 nic_dict['link'] = None
7842 if nic_op == constants.DDM_ADD:
7843 nic_mac = nic_dict.get('mac', None)
7845 nic_dict['mac'] = constants.VALUE_AUTO
7847 if 'mac' in nic_dict:
7848 nic_mac = nic_dict['mac']
7849 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7850 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7852 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7853 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7854 " modifying an existing nic",
7857 if nic_addremove > 1:
7858 raise errors.OpPrereqError("Only one NIC add or remove operation"
7859 " supported at a time", errors.ECODE_INVAL)
7861 def ExpandNames(self):
7862 self._ExpandAndLockInstance()
7863 self.needed_locks[locking.LEVEL_NODE] = []
7864 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7866 def DeclareLocks(self, level):
7867 if level == locking.LEVEL_NODE:
7868 self._LockInstancesNodes()
7869 if self.op.disk_template and self.op.remote_node:
7870 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7871 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
7873 def BuildHooksEnv(self):
7876 This runs on the master, primary and secondaries.
7880 if constants.BE_MEMORY in self.be_new:
7881 args['memory'] = self.be_new[constants.BE_MEMORY]
7882 if constants.BE_VCPUS in self.be_new:
7883 args['vcpus'] = self.be_new[constants.BE_VCPUS]
7884 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7885 # information at all.
7888 nic_override = dict(self.op.nics)
7889 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7890 for idx, nic in enumerate(self.instance.nics):
7891 if idx in nic_override:
7892 this_nic_override = nic_override[idx]
7894 this_nic_override = {}
7895 if 'ip' in this_nic_override:
7896 ip = this_nic_override['ip']
7899 if 'mac' in this_nic_override:
7900 mac = this_nic_override['mac']
7903 if idx in self.nic_pnew:
7904 nicparams = self.nic_pnew[idx]
7906 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7907 mode = nicparams[constants.NIC_MODE]
7908 link = nicparams[constants.NIC_LINK]
7909 args['nics'].append((ip, mac, mode, link))
7910 if constants.DDM_ADD in nic_override:
7911 ip = nic_override[constants.DDM_ADD].get('ip', None)
7912 mac = nic_override[constants.DDM_ADD]['mac']
7913 nicparams = self.nic_pnew[constants.DDM_ADD]
7914 mode = nicparams[constants.NIC_MODE]
7915 link = nicparams[constants.NIC_LINK]
7916 args['nics'].append((ip, mac, mode, link))
7917 elif constants.DDM_REMOVE in nic_override:
7918 del args['nics'][-1]
7920 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7921 if self.op.disk_template:
7922 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
7923 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7927 def _GetUpdatedParams(old_params, update_dict,
7928 default_values, parameter_types):
7929 """Return the new params dict for the given params.
7931 @type old_params: dict
7932 @param old_params: old parameters
7933 @type update_dict: dict
7934 @param update_dict: dict containing new parameter values,
7935 or constants.VALUE_DEFAULT to reset the
7936 parameter to its default value
7937 @type default_values: dict
7938 @param default_values: default values for the filled parameters
7939 @type parameter_types: dict
7940 @param parameter_types: dict mapping target dict keys to types
7941 in constants.ENFORCEABLE_TYPES
7942 @rtype: (dict, dict)
7943 @return: (new_parameters, filled_parameters)
7946 params_copy = copy.deepcopy(old_params)
7947 for key, val in update_dict.iteritems():
7948 if val == constants.VALUE_DEFAULT:
7950 del params_copy[key]
7954 params_copy[key] = val
7955 utils.ForceDictType(params_copy, parameter_types)
7956 params_filled = objects.FillDict(default_values, params_copy)
7957 return (params_copy, params_filled)
7959 def CheckPrereq(self):
7960 """Check prerequisites.
7962 This only checks the instance list against the existing names.
7965 self.force = self.op.force
7967 # checking the new params on the primary/secondary nodes
7969 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7970 cluster = self.cluster = self.cfg.GetClusterInfo()
7971 assert self.instance is not None, \
7972 "Cannot retrieve locked instance %s" % self.op.instance_name
7973 pnode = instance.primary_node
7974 nodelist = list(instance.all_nodes)
7976 if self.op.disk_template:
7977 if instance.disk_template == self.op.disk_template:
7978 raise errors.OpPrereqError("Instance already has disk template %s" %
7979 instance.disk_template, errors.ECODE_INVAL)
7981 if (instance.disk_template,
7982 self.op.disk_template) not in self._DISK_CONVERSIONS:
7983 raise errors.OpPrereqError("Unsupported disk template conversion from"
7984 " %s to %s" % (instance.disk_template,
7985 self.op.disk_template),
7987 if self.op.disk_template in constants.DTS_NET_MIRROR:
7988 _CheckNodeOnline(self, self.op.remote_node)
7989 _CheckNodeNotDrained(self, self.op.remote_node)
7990 disks = [{"size": d.size} for d in instance.disks]
7991 required = _ComputeDiskSize(self.op.disk_template, disks)
7992 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
7993 _CheckInstanceDown(self, instance, "cannot change disk template")
7995 # hvparams processing
7996 if self.op.hvparams:
7997 i_hvdict, hv_new = self._GetUpdatedParams(
7998 instance.hvparams, self.op.hvparams,
7999 cluster.hvparams[instance.hypervisor],
8000 constants.HVS_PARAMETER_TYPES)
8002 hypervisor.GetHypervisor(
8003 instance.hypervisor).CheckParameterSyntax(hv_new)
8004 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8005 self.hv_new = hv_new # the new actual values
8006 self.hv_inst = i_hvdict # the new dict (without defaults)
8008 self.hv_new = self.hv_inst = {}
8010 # beparams processing
8011 if self.op.beparams:
8012 i_bedict, be_new = self._GetUpdatedParams(
8013 instance.beparams, self.op.beparams,
8014 cluster.beparams[constants.PP_DEFAULT],
8015 constants.BES_PARAMETER_TYPES)
8016 self.be_new = be_new # the new actual values
8017 self.be_inst = i_bedict # the new dict (without defaults)
8019 self.be_new = self.be_inst = {}
8023 if constants.BE_MEMORY in self.op.beparams and not self.force:
8024 mem_check_list = [pnode]
8025 if be_new[constants.BE_AUTO_BALANCE]:
8026 # either we changed auto_balance to yes or it was from before
8027 mem_check_list.extend(instance.secondary_nodes)
8028 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8029 instance.hypervisor)
8030 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8031 instance.hypervisor)
8032 pninfo = nodeinfo[pnode]
8033 msg = pninfo.fail_msg
8035 # Assume the primary node is unreachable and go ahead
8036 self.warn.append("Can't get info from primary node %s: %s" %
8038 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8039 self.warn.append("Node data from primary node %s doesn't contain"
8040 " free memory information" % pnode)
8041 elif instance_info.fail_msg:
8042 self.warn.append("Can't get instance runtime information: %s" %
8043 instance_info.fail_msg)
8045 if instance_info.payload:
8046 current_mem = int(instance_info.payload['memory'])
8048 # Assume instance not running
8049 # (there is a slight race condition here, but it's not very probable,
8050 # and we have no other way to check)
8052 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8053 pninfo.payload['memory_free'])
8055 raise errors.OpPrereqError("This change will prevent the instance"
8056 " from starting, due to %d MB of memory"
8057 " missing on its primary node" % miss_mem,
8060 if be_new[constants.BE_AUTO_BALANCE]:
8061 for node, nres in nodeinfo.items():
8062 if node not in instance.secondary_nodes:
8066 self.warn.append("Can't get info from secondary node %s: %s" %
8068 elif not isinstance(nres.payload.get('memory_free', None), int):
8069 self.warn.append("Secondary node %s didn't return free"
8070 " memory information" % node)
8071 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8072 self.warn.append("Not enough memory to failover instance to"
8073 " secondary node %s" % node)
8078 for nic_op, nic_dict in self.op.nics:
8079 if nic_op == constants.DDM_REMOVE:
8080 if not instance.nics:
8081 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8084 if nic_op != constants.DDM_ADD:
8086 if not instance.nics:
8087 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8088 " no NICs" % nic_op,
8090 if nic_op < 0 or nic_op >= len(instance.nics):
8091 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8093 (nic_op, len(instance.nics) - 1),
8095 old_nic_params = instance.nics[nic_op].nicparams
8096 old_nic_ip = instance.nics[nic_op].ip
8101 update_params_dict = dict([(key, nic_dict[key])
8102 for key in constants.NICS_PARAMETERS
8103 if key in nic_dict])
8105 if 'bridge' in nic_dict:
8106 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8108 new_nic_params, new_filled_nic_params = \
8109 self._GetUpdatedParams(old_nic_params, update_params_dict,
8110 cluster.nicparams[constants.PP_DEFAULT],
8111 constants.NICS_PARAMETER_TYPES)
8112 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8113 self.nic_pinst[nic_op] = new_nic_params
8114 self.nic_pnew[nic_op] = new_filled_nic_params
8115 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8117 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8118 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8119 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8121 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8123 self.warn.append(msg)
8125 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8126 if new_nic_mode == constants.NIC_MODE_ROUTED:
8127 if 'ip' in nic_dict:
8128 nic_ip = nic_dict['ip']
8132 raise errors.OpPrereqError('Cannot set the nic ip to None'
8133 ' on a routed nic', errors.ECODE_INVAL)
8134 if 'mac' in nic_dict:
8135 nic_mac = nic_dict['mac']
8137 raise errors.OpPrereqError('Cannot set the nic mac to None',
8139 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8140 # otherwise generate the mac
8141 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8143 # or validate/reserve the current one
8145 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8146 except errors.ReservationError:
8147 raise errors.OpPrereqError("MAC address %s already in use"
8148 " in cluster" % nic_mac,
8149 errors.ECODE_NOTUNIQUE)
8152 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8153 raise errors.OpPrereqError("Disk operations not supported for"
8154 " diskless instances",
8156 for disk_op, _ in self.op.disks:
8157 if disk_op == constants.DDM_REMOVE:
8158 if len(instance.disks) == 1:
8159 raise errors.OpPrereqError("Cannot remove the last disk of"
8160 " an instance", errors.ECODE_INVAL)
8161 _CheckInstanceDown(self, instance, "cannot remove disks")
8163 if (disk_op == constants.DDM_ADD and
8164 len(instance.nics) >= constants.MAX_DISKS):
8165 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8166 " add more" % constants.MAX_DISKS,
8168 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8170 if disk_op < 0 or disk_op >= len(instance.disks):
8171 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8173 (disk_op, len(instance.disks)),
8178 def _ConvertPlainToDrbd(self, feedback_fn):
8179 """Converts an instance from plain to drbd.
8182 feedback_fn("Converting template to drbd")
8183 instance = self.instance
8184 pnode = instance.primary_node
8185 snode = self.op.remote_node
8187 # create a fake disk info for _GenerateDiskTemplate
8188 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8189 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8190 instance.name, pnode, [snode],
8191 disk_info, None, None, 0)
8192 info = _GetInstanceInfoText(instance)
8193 feedback_fn("Creating aditional volumes...")
8194 # first, create the missing data and meta devices
8195 for disk in new_disks:
8196 # unfortunately this is... not too nice
8197 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8199 for child in disk.children:
8200 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8201 # at this stage, all new LVs have been created, we can rename the
8203 feedback_fn("Renaming original volumes...")
8204 rename_list = [(o, n.children[0].logical_id)
8205 for (o, n) in zip(instance.disks, new_disks)]
8206 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8207 result.Raise("Failed to rename original LVs")
8209 feedback_fn("Initializing DRBD devices...")
8210 # all child devices are in place, we can now create the DRBD devices
8211 for disk in new_disks:
8212 for node in [pnode, snode]:
8213 f_create = node == pnode
8214 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8216 # at this point, the instance has been modified
8217 instance.disk_template = constants.DT_DRBD8
8218 instance.disks = new_disks
8219 self.cfg.Update(instance, feedback_fn)
8221 # disks are created, waiting for sync
8222 disk_abort = not _WaitForSync(self, instance)
8224 raise errors.OpExecError("There are some degraded disks for"
8225 " this instance, please cleanup manually")
8227 def _ConvertDrbdToPlain(self, feedback_fn):
8228 """Converts an instance from drbd to plain.
8231 instance = self.instance
8232 assert len(instance.secondary_nodes) == 1
8233 pnode = instance.primary_node
8234 snode = instance.secondary_nodes[0]
8235 feedback_fn("Converting template to plain")
8237 old_disks = instance.disks
8238 new_disks = [d.children[0] for d in old_disks]
8240 # copy over size and mode
8241 for parent, child in zip(old_disks, new_disks):
8242 child.size = parent.size
8243 child.mode = parent.mode
8245 # update instance structure
8246 instance.disks = new_disks
8247 instance.disk_template = constants.DT_PLAIN
8248 self.cfg.Update(instance, feedback_fn)
8250 feedback_fn("Removing volumes on the secondary node...")
8251 for disk in old_disks:
8252 self.cfg.SetDiskID(disk, snode)
8253 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8255 self.LogWarning("Could not remove block device %s on node %s,"
8256 " continuing anyway: %s", disk.iv_name, snode, msg)
8258 feedback_fn("Removing unneeded volumes on the primary node...")
8259 for idx, disk in enumerate(old_disks):
8260 meta = disk.children[1]
8261 self.cfg.SetDiskID(meta, pnode)
8262 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8264 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8265 " continuing anyway: %s", idx, pnode, msg)
8268 def Exec(self, feedback_fn):
8269 """Modifies an instance.
8271 All parameters take effect only at the next restart of the instance.
8274 # Process here the warnings from CheckPrereq, as we don't have a
8275 # feedback_fn there.
8276 for warn in self.warn:
8277 feedback_fn("WARNING: %s" % warn)
8280 instance = self.instance
8282 for disk_op, disk_dict in self.op.disks:
8283 if disk_op == constants.DDM_REMOVE:
8284 # remove the last disk
8285 device = instance.disks.pop()
8286 device_idx = len(instance.disks)
8287 for node, disk in device.ComputeNodeTree(instance.primary_node):
8288 self.cfg.SetDiskID(disk, node)
8289 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8291 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8292 " continuing anyway", device_idx, node, msg)
8293 result.append(("disk/%d" % device_idx, "remove"))
8294 elif disk_op == constants.DDM_ADD:
8296 if instance.disk_template == constants.DT_FILE:
8297 file_driver, file_path = instance.disks[0].logical_id
8298 file_path = os.path.dirname(file_path)
8300 file_driver = file_path = None
8301 disk_idx_base = len(instance.disks)
8302 new_disk = _GenerateDiskTemplate(self,
8303 instance.disk_template,
8304 instance.name, instance.primary_node,
8305 instance.secondary_nodes,
8310 instance.disks.append(new_disk)
8311 info = _GetInstanceInfoText(instance)
8313 logging.info("Creating volume %s for instance %s",
8314 new_disk.iv_name, instance.name)
8315 # Note: this needs to be kept in sync with _CreateDisks
8317 for node in instance.all_nodes:
8318 f_create = node == instance.primary_node
8320 _CreateBlockDev(self, node, instance, new_disk,
8321 f_create, info, f_create)
8322 except errors.OpExecError, err:
8323 self.LogWarning("Failed to create volume %s (%s) on"
8325 new_disk.iv_name, new_disk, node, err)
8326 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8327 (new_disk.size, new_disk.mode)))
8329 # change a given disk
8330 instance.disks[disk_op].mode = disk_dict['mode']
8331 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8333 if self.op.disk_template:
8334 r_shut = _ShutdownInstanceDisks(self, instance)
8336 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8337 " proceed with disk template conversion")
8338 mode = (instance.disk_template, self.op.disk_template)
8340 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8342 self.cfg.ReleaseDRBDMinors(instance.name)
8344 result.append(("disk_template", self.op.disk_template))
8347 for nic_op, nic_dict in self.op.nics:
8348 if nic_op == constants.DDM_REMOVE:
8349 # remove the last nic
8350 del instance.nics[-1]
8351 result.append(("nic.%d" % len(instance.nics), "remove"))
8352 elif nic_op == constants.DDM_ADD:
8353 # mac and bridge should be set, by now
8354 mac = nic_dict['mac']
8355 ip = nic_dict.get('ip', None)
8356 nicparams = self.nic_pinst[constants.DDM_ADD]
8357 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8358 instance.nics.append(new_nic)
8359 result.append(("nic.%d" % (len(instance.nics) - 1),
8360 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8361 (new_nic.mac, new_nic.ip,
8362 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8363 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8366 for key in 'mac', 'ip':
8368 setattr(instance.nics[nic_op], key, nic_dict[key])
8369 if nic_op in self.nic_pinst:
8370 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8371 for key, val in nic_dict.iteritems():
8372 result.append(("nic.%s/%d" % (key, nic_op), val))
8375 if self.op.hvparams:
8376 instance.hvparams = self.hv_inst
8377 for key, val in self.op.hvparams.iteritems():
8378 result.append(("hv/%s" % key, val))
8381 if self.op.beparams:
8382 instance.beparams = self.be_inst
8383 for key, val in self.op.beparams.iteritems():
8384 result.append(("be/%s" % key, val))
8386 self.cfg.Update(instance, feedback_fn)
8390 _DISK_CONVERSIONS = {
8391 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8392 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8395 class LUQueryExports(NoHooksLU):
8396 """Query the exports list
8399 _OP_REQP = ['nodes']
8402 def ExpandNames(self):
8403 self.needed_locks = {}
8404 self.share_locks[locking.LEVEL_NODE] = 1
8405 if not self.op.nodes:
8406 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8408 self.needed_locks[locking.LEVEL_NODE] = \
8409 _GetWantedNodes(self, self.op.nodes)
8411 def CheckPrereq(self):
8412 """Check prerequisites.
8415 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8417 def Exec(self, feedback_fn):
8418 """Compute the list of all the exported system images.
8421 @return: a dictionary with the structure node->(export-list)
8422 where export-list is a list of the instances exported on
8426 rpcresult = self.rpc.call_export_list(self.nodes)
8428 for node in rpcresult:
8429 if rpcresult[node].fail_msg:
8430 result[node] = False
8432 result[node] = rpcresult[node].payload
8437 class LUExportInstance(LogicalUnit):
8438 """Export an instance to an image in the cluster.
8441 HPATH = "instance-export"
8442 HTYPE = constants.HTYPE_INSTANCE
8443 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8446 def CheckArguments(self):
8447 """Check the arguments.
8450 _CheckBooleanOpField(self.op, "remove_instance")
8451 _CheckBooleanOpField(self.op, "ignore_remove_failures")
8453 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8454 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8455 self.remove_instance = getattr(self.op, "remove_instance", False)
8456 self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
8459 if self.remove_instance and not self.op.shutdown:
8460 raise errors.OpPrereqError("Can not remove instance without shutting it"
8463 def ExpandNames(self):
8464 self._ExpandAndLockInstance()
8466 # FIXME: lock only instance primary and destination node
8468 # Sad but true, for now we have do lock all nodes, as we don't know where
8469 # the previous export might be, and and in this LU we search for it and
8470 # remove it from its current node. In the future we could fix this by:
8471 # - making a tasklet to search (share-lock all), then create the new one,
8472 # then one to remove, after
8473 # - removing the removal operation altogether
8474 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8476 def DeclareLocks(self, level):
8477 """Last minute lock declaration."""
8478 # All nodes are locked anyway, so nothing to do here.
8480 def BuildHooksEnv(self):
8483 This will run on the master, primary node and target node.
8487 "EXPORT_NODE": self.op.target_node,
8488 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8489 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8490 # TODO: Generic function for boolean env variables
8491 "REMOVE_INSTANCE": str(bool(self.remove_instance)),
8493 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8494 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8495 self.op.target_node]
8498 def CheckPrereq(self):
8499 """Check prerequisites.
8501 This checks that the instance and node names are valid.
8504 instance_name = self.op.instance_name
8505 self.instance = self.cfg.GetInstanceInfo(instance_name)
8506 assert self.instance is not None, \
8507 "Cannot retrieve locked instance %s" % self.op.instance_name
8508 _CheckNodeOnline(self, self.instance.primary_node)
8510 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8511 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8512 assert self.dst_node is not None
8514 _CheckNodeOnline(self, self.dst_node.name)
8515 _CheckNodeNotDrained(self, self.dst_node.name)
8517 # instance disk type verification
8518 # TODO: Implement export support for file-based disks
8519 for disk in self.instance.disks:
8520 if disk.dev_type == constants.LD_FILE:
8521 raise errors.OpPrereqError("Export not supported for instances with"
8522 " file-based disks", errors.ECODE_INVAL)
8524 def Exec(self, feedback_fn):
8525 """Export an instance to an image in the cluster.
8528 instance = self.instance
8529 dst_node = self.dst_node
8530 src_node = instance.primary_node
8532 if self.op.shutdown:
8533 # shutdown the instance, but not the disks
8534 feedback_fn("Shutting down instance %s" % instance.name)
8535 result = self.rpc.call_instance_shutdown(src_node, instance,
8536 self.shutdown_timeout)
8537 # TODO: Maybe ignore failures if ignore_remove_failures is set
8538 result.Raise("Could not shutdown instance %s on"
8539 " node %s" % (instance.name, src_node))
8541 vgname = self.cfg.GetVGName()
8545 # set the disks ID correctly since call_instance_start needs the
8546 # correct drbd minor to create the symlinks
8547 for disk in instance.disks:
8548 self.cfg.SetDiskID(disk, src_node)
8550 activate_disks = (not instance.admin_up)
8553 # Activate the instance disks if we'exporting a stopped instance
8554 feedback_fn("Activating disks for %s" % instance.name)
8555 _StartInstanceDisks(self, instance, None)
8561 for idx, disk in enumerate(instance.disks):
8562 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8565 # result.payload will be a snapshot of an lvm leaf of the one we
8567 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8568 msg = result.fail_msg
8570 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8572 snap_disks.append(False)
8574 disk_id = (vgname, result.payload)
8575 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8576 logical_id=disk_id, physical_id=disk_id,
8577 iv_name=disk.iv_name)
8578 snap_disks.append(new_dev)
8581 if self.op.shutdown and instance.admin_up and not self.remove_instance:
8582 feedback_fn("Starting instance %s" % instance.name)
8583 result = self.rpc.call_instance_start(src_node, instance, None, None)
8584 msg = result.fail_msg
8586 _ShutdownInstanceDisks(self, instance)
8587 raise errors.OpExecError("Could not start instance: %s" % msg)
8589 # TODO: check for size
8591 cluster_name = self.cfg.GetClusterName()
8592 for idx, dev in enumerate(snap_disks):
8593 feedback_fn("Exporting snapshot %s from %s to %s" %
8594 (idx, src_node, dst_node.name))
8596 # FIXME: pass debug from opcode to backend
8597 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8598 instance, cluster_name,
8599 idx, self.op.debug_level)
8600 msg = result.fail_msg
8602 self.LogWarning("Could not export disk/%s from node %s to"
8603 " node %s: %s", idx, src_node, dst_node.name, msg)
8604 dresults.append(False)
8606 dresults.append(True)
8607 msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8609 self.LogWarning("Could not remove snapshot for disk/%d from node"
8610 " %s: %s", idx, src_node, msg)
8612 dresults.append(False)
8614 feedback_fn("Finalizing export on %s" % dst_node.name)
8615 result = self.rpc.call_finalize_export(dst_node.name, instance,
8618 msg = result.fail_msg
8620 self.LogWarning("Could not finalize export for instance %s"
8621 " on node %s: %s", instance.name, dst_node.name, msg)
8626 feedback_fn("Deactivating disks for %s" % instance.name)
8627 _ShutdownInstanceDisks(self, instance)
8629 # Remove instance if requested
8630 if self.remove_instance:
8631 feedback_fn("Removing instance %s" % instance.name)
8632 _RemoveInstance(self, feedback_fn, instance, self.ignore_remove_failures)
8634 nodelist = self.cfg.GetNodeList()
8635 nodelist.remove(dst_node.name)
8637 # on one-node clusters nodelist will be empty after the removal
8638 # if we proceed the backup would be removed because OpQueryExports
8639 # substitutes an empty list with the full cluster node list.
8640 iname = instance.name
8642 feedback_fn("Removing old exports for instance %s" % iname)
8643 exportlist = self.rpc.call_export_list(nodelist)
8644 for node in exportlist:
8645 if exportlist[node].fail_msg:
8647 if iname in exportlist[node].payload:
8648 msg = self.rpc.call_export_remove(node, iname).fail_msg
8650 self.LogWarning("Could not remove older export for instance %s"
8651 " on node %s: %s", iname, node, msg)
8653 return fin_resu, dresults
8656 class LURemoveExport(NoHooksLU):
8657 """Remove exports related to the named instance.
8660 _OP_REQP = ["instance_name"]
8663 def ExpandNames(self):
8664 self.needed_locks = {}
8665 # We need all nodes to be locked in order for RemoveExport to work, but we
8666 # don't need to lock the instance itself, as nothing will happen to it (and
8667 # we can remove exports also for a removed instance)
8668 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8670 def CheckPrereq(self):
8671 """Check prerequisites.
8675 def Exec(self, feedback_fn):
8676 """Remove any export.
8679 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8680 # If the instance was not found we'll try with the name that was passed in.
8681 # This will only work if it was an FQDN, though.
8683 if not instance_name:
8685 instance_name = self.op.instance_name
8687 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8688 exportlist = self.rpc.call_export_list(locked_nodes)
8690 for node in exportlist:
8691 msg = exportlist[node].fail_msg
8693 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8695 if instance_name in exportlist[node].payload:
8697 result = self.rpc.call_export_remove(node, instance_name)
8698 msg = result.fail_msg
8700 logging.error("Could not remove export for instance %s"
8701 " on node %s: %s", instance_name, node, msg)
8703 if fqdn_warn and not found:
8704 feedback_fn("Export not found. If trying to remove an export belonging"
8705 " to a deleted instance please use its Fully Qualified"
8709 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8712 This is an abstract class which is the parent of all the other tags LUs.
8716 def ExpandNames(self):
8717 self.needed_locks = {}
8718 if self.op.kind == constants.TAG_NODE:
8719 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8720 self.needed_locks[locking.LEVEL_NODE] = self.op.name
8721 elif self.op.kind == constants.TAG_INSTANCE:
8722 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8723 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8725 def CheckPrereq(self):
8726 """Check prerequisites.
8729 if self.op.kind == constants.TAG_CLUSTER:
8730 self.target = self.cfg.GetClusterInfo()
8731 elif self.op.kind == constants.TAG_NODE:
8732 self.target = self.cfg.GetNodeInfo(self.op.name)
8733 elif self.op.kind == constants.TAG_INSTANCE:
8734 self.target = self.cfg.GetInstanceInfo(self.op.name)
8736 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8737 str(self.op.kind), errors.ECODE_INVAL)
8740 class LUGetTags(TagsLU):
8741 """Returns the tags of a given object.
8744 _OP_REQP = ["kind", "name"]
8747 def Exec(self, feedback_fn):
8748 """Returns the tag list.
8751 return list(self.target.GetTags())
8754 class LUSearchTags(NoHooksLU):
8755 """Searches the tags for a given pattern.
8758 _OP_REQP = ["pattern"]
8761 def ExpandNames(self):
8762 self.needed_locks = {}
8764 def CheckPrereq(self):
8765 """Check prerequisites.
8767 This checks the pattern passed for validity by compiling it.
8771 self.re = re.compile(self.op.pattern)
8772 except re.error, err:
8773 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8774 (self.op.pattern, err), errors.ECODE_INVAL)
8776 def Exec(self, feedback_fn):
8777 """Returns the tag list.
8781 tgts = [("/cluster", cfg.GetClusterInfo())]
8782 ilist = cfg.GetAllInstancesInfo().values()
8783 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8784 nlist = cfg.GetAllNodesInfo().values()
8785 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8787 for path, target in tgts:
8788 for tag in target.GetTags():
8789 if self.re.search(tag):
8790 results.append((path, tag))
8794 class LUAddTags(TagsLU):
8795 """Sets a tag on a given object.
8798 _OP_REQP = ["kind", "name", "tags"]
8801 def CheckPrereq(self):
8802 """Check prerequisites.
8804 This checks the type and length of the tag name and value.
8807 TagsLU.CheckPrereq(self)
8808 for tag in self.op.tags:
8809 objects.TaggableObject.ValidateTag(tag)
8811 def Exec(self, feedback_fn):
8816 for tag in self.op.tags:
8817 self.target.AddTag(tag)
8818 except errors.TagError, err:
8819 raise errors.OpExecError("Error while setting tag: %s" % str(err))
8820 self.cfg.Update(self.target, feedback_fn)
8823 class LUDelTags(TagsLU):
8824 """Delete a list of tags from a given object.
8827 _OP_REQP = ["kind", "name", "tags"]
8830 def CheckPrereq(self):
8831 """Check prerequisites.
8833 This checks that we have the given tag.
8836 TagsLU.CheckPrereq(self)
8837 for tag in self.op.tags:
8838 objects.TaggableObject.ValidateTag(tag)
8839 del_tags = frozenset(self.op.tags)
8840 cur_tags = self.target.GetTags()
8841 if not del_tags <= cur_tags:
8842 diff_tags = del_tags - cur_tags
8843 diff_names = ["'%s'" % tag for tag in diff_tags]
8845 raise errors.OpPrereqError("Tag(s) %s not found" %
8846 (",".join(diff_names)), errors.ECODE_NOENT)
8848 def Exec(self, feedback_fn):
8849 """Remove the tag from the object.
8852 for tag in self.op.tags:
8853 self.target.RemoveTag(tag)
8854 self.cfg.Update(self.target, feedback_fn)
8857 class LUTestDelay(NoHooksLU):
8858 """Sleep for a specified amount of time.
8860 This LU sleeps on the master and/or nodes for a specified amount of
8864 _OP_REQP = ["duration", "on_master", "on_nodes"]
8867 def ExpandNames(self):
8868 """Expand names and set required locks.
8870 This expands the node list, if any.
8873 self.needed_locks = {}
8874 if self.op.on_nodes:
8875 # _GetWantedNodes can be used here, but is not always appropriate to use
8876 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8878 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8879 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8881 def CheckPrereq(self):
8882 """Check prerequisites.
8886 def Exec(self, feedback_fn):
8887 """Do the actual sleep.
8890 if self.op.on_master:
8891 if not utils.TestDelay(self.op.duration):
8892 raise errors.OpExecError("Error during master delay test")
8893 if self.op.on_nodes:
8894 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8895 for node, node_result in result.items():
8896 node_result.Raise("Failure during rpc call to node %s" % node)
8899 class IAllocator(object):
8900 """IAllocator framework.
8902 An IAllocator instance has three sets of attributes:
8903 - cfg that is needed to query the cluster
8904 - input data (all members of the _KEYS class attribute are required)
8905 - four buffer attributes (in|out_data|text), that represent the
8906 input (to the external script) in text and data structure format,
8907 and the output from it, again in two formats
8908 - the result variables from the script (success, info, nodes) for
8912 # pylint: disable-msg=R0902
8913 # lots of instance attributes
8915 "name", "mem_size", "disks", "disk_template",
8916 "os", "tags", "nics", "vcpus", "hypervisor",
8919 "name", "relocate_from",
8925 def __init__(self, cfg, rpc, mode, **kwargs):
8928 # init buffer variables
8929 self.in_text = self.out_text = self.in_data = self.out_data = None
8930 # init all input fields so that pylint is happy
8932 self.mem_size = self.disks = self.disk_template = None
8933 self.os = self.tags = self.nics = self.vcpus = None
8934 self.hypervisor = None
8935 self.relocate_from = None
8937 self.evac_nodes = None
8939 self.required_nodes = None
8940 # init result fields
8941 self.success = self.info = self.result = None
8942 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8943 keyset = self._ALLO_KEYS
8944 fn = self._AddNewInstance
8945 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8946 keyset = self._RELO_KEYS
8947 fn = self._AddRelocateInstance
8948 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8949 keyset = self._EVAC_KEYS
8950 fn = self._AddEvacuateNodes
8952 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8953 " IAllocator" % self.mode)
8955 if key not in keyset:
8956 raise errors.ProgrammerError("Invalid input parameter '%s' to"
8957 " IAllocator" % key)
8958 setattr(self, key, kwargs[key])
8961 if key not in kwargs:
8962 raise errors.ProgrammerError("Missing input parameter '%s' to"
8963 " IAllocator" % key)
8964 self._BuildInputData(fn)
8966 def _ComputeClusterData(self):
8967 """Compute the generic allocator input data.
8969 This is the data that is independent of the actual operation.
8973 cluster_info = cfg.GetClusterInfo()
8976 "version": constants.IALLOCATOR_VERSION,
8977 "cluster_name": cfg.GetClusterName(),
8978 "cluster_tags": list(cluster_info.GetTags()),
8979 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8980 # we don't have job IDs
8982 iinfo = cfg.GetAllInstancesInfo().values()
8983 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8987 node_list = cfg.GetNodeList()
8989 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8990 hypervisor_name = self.hypervisor
8991 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8992 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8993 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8994 hypervisor_name = cluster_info.enabled_hypervisors[0]
8996 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8999 self.rpc.call_all_instances_info(node_list,
9000 cluster_info.enabled_hypervisors)
9001 for nname, nresult in node_data.items():
9002 # first fill in static (config-based) values
9003 ninfo = cfg.GetNodeInfo(nname)
9005 "tags": list(ninfo.GetTags()),
9006 "primary_ip": ninfo.primary_ip,
9007 "secondary_ip": ninfo.secondary_ip,
9008 "offline": ninfo.offline,
9009 "drained": ninfo.drained,
9010 "master_candidate": ninfo.master_candidate,
9013 if not (ninfo.offline or ninfo.drained):
9014 nresult.Raise("Can't get data for node %s" % nname)
9015 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9017 remote_info = nresult.payload
9019 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9020 'vg_size', 'vg_free', 'cpu_total']:
9021 if attr not in remote_info:
9022 raise errors.OpExecError("Node '%s' didn't return attribute"
9023 " '%s'" % (nname, attr))
9024 if not isinstance(remote_info[attr], int):
9025 raise errors.OpExecError("Node '%s' returned invalid value"
9027 (nname, attr, remote_info[attr]))
9028 # compute memory used by primary instances
9029 i_p_mem = i_p_up_mem = 0
9030 for iinfo, beinfo in i_list:
9031 if iinfo.primary_node == nname:
9032 i_p_mem += beinfo[constants.BE_MEMORY]
9033 if iinfo.name not in node_iinfo[nname].payload:
9036 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9037 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9038 remote_info['memory_free'] -= max(0, i_mem_diff)
9041 i_p_up_mem += beinfo[constants.BE_MEMORY]
9043 # compute memory used by instances
9045 "total_memory": remote_info['memory_total'],
9046 "reserved_memory": remote_info['memory_dom0'],
9047 "free_memory": remote_info['memory_free'],
9048 "total_disk": remote_info['vg_size'],
9049 "free_disk": remote_info['vg_free'],
9050 "total_cpus": remote_info['cpu_total'],
9051 "i_pri_memory": i_p_mem,
9052 "i_pri_up_memory": i_p_up_mem,
9056 node_results[nname] = pnr
9057 data["nodes"] = node_results
9061 for iinfo, beinfo in i_list:
9063 for nic in iinfo.nics:
9064 filled_params = objects.FillDict(
9065 cluster_info.nicparams[constants.PP_DEFAULT],
9067 nic_dict = {"mac": nic.mac,
9069 "mode": filled_params[constants.NIC_MODE],
9070 "link": filled_params[constants.NIC_LINK],
9072 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9073 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9074 nic_data.append(nic_dict)
9076 "tags": list(iinfo.GetTags()),
9077 "admin_up": iinfo.admin_up,
9078 "vcpus": beinfo[constants.BE_VCPUS],
9079 "memory": beinfo[constants.BE_MEMORY],
9081 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9083 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9084 "disk_template": iinfo.disk_template,
9085 "hypervisor": iinfo.hypervisor,
9087 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9089 instance_data[iinfo.name] = pir
9091 data["instances"] = instance_data
9095 def _AddNewInstance(self):
9096 """Add new instance data to allocator structure.
9098 This in combination with _AllocatorGetClusterData will create the
9099 correct structure needed as input for the allocator.
9101 The checks for the completeness of the opcode must have already been
9105 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9107 if self.disk_template in constants.DTS_NET_MIRROR:
9108 self.required_nodes = 2
9110 self.required_nodes = 1
9113 "disk_template": self.disk_template,
9116 "vcpus": self.vcpus,
9117 "memory": self.mem_size,
9118 "disks": self.disks,
9119 "disk_space_total": disk_space,
9121 "required_nodes": self.required_nodes,
9125 def _AddRelocateInstance(self):
9126 """Add relocate instance data to allocator structure.
9128 This in combination with _IAllocatorGetClusterData will create the
9129 correct structure needed as input for the allocator.
9131 The checks for the completeness of the opcode must have already been
9135 instance = self.cfg.GetInstanceInfo(self.name)
9136 if instance is None:
9137 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9138 " IAllocator" % self.name)
9140 if instance.disk_template not in constants.DTS_NET_MIRROR:
9141 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9144 if len(instance.secondary_nodes) != 1:
9145 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9148 self.required_nodes = 1
9149 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9150 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9154 "disk_space_total": disk_space,
9155 "required_nodes": self.required_nodes,
9156 "relocate_from": self.relocate_from,
9160 def _AddEvacuateNodes(self):
9161 """Add evacuate nodes data to allocator structure.
9165 "evac_nodes": self.evac_nodes
9169 def _BuildInputData(self, fn):
9170 """Build input data structures.
9173 self._ComputeClusterData()
9176 request["type"] = self.mode
9177 self.in_data["request"] = request
9179 self.in_text = serializer.Dump(self.in_data)
9181 def Run(self, name, validate=True, call_fn=None):
9182 """Run an instance allocator and return the results.
9186 call_fn = self.rpc.call_iallocator_runner
9188 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9189 result.Raise("Failure while running the iallocator script")
9191 self.out_text = result.payload
9193 self._ValidateResult()
9195 def _ValidateResult(self):
9196 """Process the allocator results.
9198 This will process and if successful save the result in
9199 self.out_data and the other parameters.
9203 rdict = serializer.Load(self.out_text)
9204 except Exception, err:
9205 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9207 if not isinstance(rdict, dict):
9208 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9210 # TODO: remove backwards compatiblity in later versions
9211 if "nodes" in rdict and "result" not in rdict:
9212 rdict["result"] = rdict["nodes"]
9215 for key in "success", "info", "result":
9216 if key not in rdict:
9217 raise errors.OpExecError("Can't parse iallocator results:"
9218 " missing key '%s'" % key)
9219 setattr(self, key, rdict[key])
9221 if not isinstance(rdict["result"], list):
9222 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9224 self.out_data = rdict
9227 class LUTestAllocator(NoHooksLU):
9228 """Run allocator tests.
9230 This LU runs the allocator tests
9233 _OP_REQP = ["direction", "mode", "name"]
9235 def CheckPrereq(self):
9236 """Check prerequisites.
9238 This checks the opcode parameters depending on the director and mode test.
9241 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9242 for attr in ["name", "mem_size", "disks", "disk_template",
9243 "os", "tags", "nics", "vcpus"]:
9244 if not hasattr(self.op, attr):
9245 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9246 attr, errors.ECODE_INVAL)
9247 iname = self.cfg.ExpandInstanceName(self.op.name)
9248 if iname is not None:
9249 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9250 iname, errors.ECODE_EXISTS)
9251 if not isinstance(self.op.nics, list):
9252 raise errors.OpPrereqError("Invalid parameter 'nics'",
9254 for row in self.op.nics:
9255 if (not isinstance(row, dict) or
9258 "bridge" not in row):
9259 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9260 " parameter", errors.ECODE_INVAL)
9261 if not isinstance(self.op.disks, list):
9262 raise errors.OpPrereqError("Invalid parameter 'disks'",
9264 for row in self.op.disks:
9265 if (not isinstance(row, dict) or
9266 "size" not in row or
9267 not isinstance(row["size"], int) or
9268 "mode" not in row or
9269 row["mode"] not in ['r', 'w']):
9270 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9271 " parameter", errors.ECODE_INVAL)
9272 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9273 self.op.hypervisor = self.cfg.GetHypervisorType()
9274 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9275 if not hasattr(self.op, "name"):
9276 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9278 fname = _ExpandInstanceName(self.cfg, self.op.name)
9279 self.op.name = fname
9280 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9281 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9282 if not hasattr(self.op, "evac_nodes"):
9283 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9284 " opcode input", errors.ECODE_INVAL)
9286 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9287 self.op.mode, errors.ECODE_INVAL)
9289 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9290 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9291 raise errors.OpPrereqError("Missing allocator name",
9293 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9294 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9295 self.op.direction, errors.ECODE_INVAL)
9297 def Exec(self, feedback_fn):
9298 """Run the allocator test.
9301 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9302 ial = IAllocator(self.cfg, self.rpc,
9305 mem_size=self.op.mem_size,
9306 disks=self.op.disks,
9307 disk_template=self.op.disk_template,
9311 vcpus=self.op.vcpus,
9312 hypervisor=self.op.hypervisor,
9314 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9315 ial = IAllocator(self.cfg, self.rpc,
9318 relocate_from=list(self.relocate_from),
9320 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9321 ial = IAllocator(self.cfg, self.rpc,
9323 evac_nodes=self.op.evac_nodes)
9325 raise errors.ProgrammerError("Uncatched mode %s in"
9326 " LUTestAllocator.Exec", self.op.mode)
9328 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9329 result = ial.in_text
9331 ial.Run(self.op.allocator, validate=False)
9332 result = ial.out_text