4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
40 from ganeti import ssh
41 from ganeti import utils
42 from ganeti import errors
43 from ganeti import hypervisor
44 from ganeti import locking
45 from ganeti import constants
46 from ganeti import objects
47 from ganeti import serializer
48 from ganeti import ssconf
49 from ganeti import uidpool
50 from ganeti import compat
51 from ganeti import masterd
53 import ganeti.masterd.instance # pylint: disable-msg=W0611
56 # need to define these here before the actual LUs
59 """Returns an empty list.
66 """Returns an empty dict.
72 class LogicalUnit(object):
73 """Logical Unit base class.
75 Subclasses must follow these rules:
76 - implement ExpandNames
77 - implement CheckPrereq (except when tasklets are used)
78 - implement Exec (except when tasklets are used)
79 - implement BuildHooksEnv
80 - redefine HPATH and HTYPE
81 - optionally redefine their run requirements:
82 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
84 Note that all commands require root permissions.
86 @ivar dry_run_result: the value (if any) that will be returned to the caller
87 in dry-run mode (signalled by opcode dry_run parameter)
88 @cvar _OP_DEFS: a list of opcode attributes and the defaults values
89 they should get if not already existing
98 def __init__(self, processor, op, context, rpc):
99 """Constructor for LogicalUnit.
101 This needs to be overridden in derived classes in order to check op
105 self.proc = processor
107 self.cfg = context.cfg
108 self.context = context
110 # Dicts used to declare locking needs to mcpu
111 self.needed_locks = None
112 self.acquired_locks = {}
113 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
115 self.remove_locks = {}
116 # Used to force good behavior when calling helper functions
117 self.recalculate_locks = {}
120 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
121 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
122 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
123 # support for dry-run
124 self.dry_run_result = None
125 # support for generic debug attribute
126 if (not hasattr(self.op, "debug_level") or
127 not isinstance(self.op.debug_level, int)):
128 self.op.debug_level = 0
133 for aname, aval in self._OP_DEFS:
134 if not hasattr(self.op, aname):
139 setattr(self.op, aname, dval)
141 for attr_name in self._OP_REQP:
142 attr_val = getattr(op, attr_name, None)
144 raise errors.OpPrereqError("Required parameter '%s' missing" %
145 attr_name, errors.ECODE_INVAL)
147 self.CheckArguments()
150 """Returns the SshRunner object
154 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
157 ssh = property(fget=__GetSSH)
159 def CheckArguments(self):
160 """Check syntactic validity for the opcode arguments.
162 This method is for doing a simple syntactic check and ensure
163 validity of opcode parameters, without any cluster-related
164 checks. While the same can be accomplished in ExpandNames and/or
165 CheckPrereq, doing these separate is better because:
167 - ExpandNames is left as as purely a lock-related function
168 - CheckPrereq is run after we have acquired locks (and possible
171 The function is allowed to change the self.op attribute so that
172 later methods can no longer worry about missing parameters.
177 def ExpandNames(self):
178 """Expand names for this LU.
180 This method is called before starting to execute the opcode, and it should
181 update all the parameters of the opcode to their canonical form (e.g. a
182 short node name must be fully expanded after this method has successfully
183 completed). This way locking, hooks, logging, ecc. can work correctly.
185 LUs which implement this method must also populate the self.needed_locks
186 member, as a dict with lock levels as keys, and a list of needed lock names
189 - use an empty dict if you don't need any lock
190 - if you don't need any lock at a particular level omit that level
191 - don't put anything for the BGL level
192 - if you want all locks at a level use locking.ALL_SET as a value
194 If you need to share locks (rather than acquire them exclusively) at one
195 level you can modify self.share_locks, setting a true value (usually 1) for
196 that level. By default locks are not shared.
198 This function can also define a list of tasklets, which then will be
199 executed in order instead of the usual LU-level CheckPrereq and Exec
200 functions, if those are not defined by the LU.
204 # Acquire all nodes and one instance
205 self.needed_locks = {
206 locking.LEVEL_NODE: locking.ALL_SET,
207 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
209 # Acquire just two nodes
210 self.needed_locks = {
211 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
214 self.needed_locks = {} # No, you can't leave it to the default value None
217 # The implementation of this method is mandatory only if the new LU is
218 # concurrent, so that old LUs don't need to be changed all at the same
221 self.needed_locks = {} # Exclusive LUs don't need locks.
223 raise NotImplementedError
225 def DeclareLocks(self, level):
226 """Declare LU locking needs for a level
228 While most LUs can just declare their locking needs at ExpandNames time,
229 sometimes there's the need to calculate some locks after having acquired
230 the ones before. This function is called just before acquiring locks at a
231 particular level, but after acquiring the ones at lower levels, and permits
232 such calculations. It can be used to modify self.needed_locks, and by
233 default it does nothing.
235 This function is only called if you have something already set in
236 self.needed_locks for the level.
238 @param level: Locking level which is going to be locked
239 @type level: member of ganeti.locking.LEVELS
243 def CheckPrereq(self):
244 """Check prerequisites for this LU.
246 This method should check that the prerequisites for the execution
247 of this LU are fulfilled. It can do internode communication, but
248 it should be idempotent - no cluster or system changes are
251 The method should raise errors.OpPrereqError in case something is
252 not fulfilled. Its return value is ignored.
254 This method should also update all the parameters of the opcode to
255 their canonical form if it hasn't been done by ExpandNames before.
258 if self.tasklets is not None:
259 for (idx, tl) in enumerate(self.tasklets):
260 logging.debug("Checking prerequisites for tasklet %s/%s",
261 idx + 1, len(self.tasklets))
264 raise NotImplementedError
266 def Exec(self, feedback_fn):
269 This method should implement the actual work. It should raise
270 errors.OpExecError for failures that are somewhat dealt with in
274 if self.tasklets is not None:
275 for (idx, tl) in enumerate(self.tasklets):
276 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
279 raise NotImplementedError
281 def BuildHooksEnv(self):
282 """Build hooks environment for this LU.
284 This method should return a three-node tuple consisting of: a dict
285 containing the environment that will be used for running the
286 specific hook for this LU, a list of node names on which the hook
287 should run before the execution, and a list of node names on which
288 the hook should run after the execution.
290 The keys of the dict must not have 'GANETI_' prefixed as this will
291 be handled in the hooks runner. Also note additional keys will be
292 added by the hooks runner. If the LU doesn't define any
293 environment, an empty dict (and not None) should be returned.
295 No nodes should be returned as an empty list (and not None).
297 Note that if the HPATH for a LU class is None, this function will
301 raise NotImplementedError
303 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
304 """Notify the LU about the results of its hooks.
306 This method is called every time a hooks phase is executed, and notifies
307 the Logical Unit about the hooks' result. The LU can then use it to alter
308 its result based on the hooks. By default the method does nothing and the
309 previous result is passed back unchanged but any LU can define it if it
310 wants to use the local cluster hook-scripts somehow.
312 @param phase: one of L{constants.HOOKS_PHASE_POST} or
313 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
314 @param hook_results: the results of the multi-node hooks rpc call
315 @param feedback_fn: function used send feedback back to the caller
316 @param lu_result: the previous Exec result this LU had, or None
318 @return: the new Exec result, based on the previous result
322 # API must be kept, thus we ignore the unused argument and could
323 # be a function warnings
324 # pylint: disable-msg=W0613,R0201
327 def _ExpandAndLockInstance(self):
328 """Helper function to expand and lock an instance.
330 Many LUs that work on an instance take its name in self.op.instance_name
331 and need to expand it and then declare the expanded name for locking. This
332 function does it, and then updates self.op.instance_name to the expanded
333 name. It also initializes needed_locks as a dict, if this hasn't been done
337 if self.needed_locks is None:
338 self.needed_locks = {}
340 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
341 "_ExpandAndLockInstance called with instance-level locks set"
342 self.op.instance_name = _ExpandInstanceName(self.cfg,
343 self.op.instance_name)
344 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346 def _LockInstancesNodes(self, primary_only=False):
347 """Helper function to declare instances' nodes for locking.
349 This function should be called after locking one or more instances to lock
350 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
351 with all primary or secondary nodes for instances already locked and
352 present in self.needed_locks[locking.LEVEL_INSTANCE].
354 It should be called from DeclareLocks, and for safety only works if
355 self.recalculate_locks[locking.LEVEL_NODE] is set.
357 In the future it may grow parameters to just lock some instance's nodes, or
358 to just lock primaries or secondary nodes, if needed.
360 If should be called in DeclareLocks in a way similar to::
362 if level == locking.LEVEL_NODE:
363 self._LockInstancesNodes()
365 @type primary_only: boolean
366 @param primary_only: only lock primary nodes of locked instances
369 assert locking.LEVEL_NODE in self.recalculate_locks, \
370 "_LockInstancesNodes helper function called with no nodes to recalculate"
372 # TODO: check if we're really been called with the instance locks held
374 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
375 # future we might want to have different behaviors depending on the value
376 # of self.recalculate_locks[locking.LEVEL_NODE]
378 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
379 instance = self.context.cfg.GetInstanceInfo(instance_name)
380 wanted_nodes.append(instance.primary_node)
382 wanted_nodes.extend(instance.secondary_nodes)
384 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
385 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
386 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
387 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389 del self.recalculate_locks[locking.LEVEL_NODE]
392 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
393 """Simple LU which runs no hooks.
395 This LU is intended as a parent for other LogicalUnits which will
396 run no hooks, in order to reduce duplicate code.
402 def BuildHooksEnv(self):
403 """Empty BuildHooksEnv for NoHooksLu.
405 This just raises an error.
408 assert False, "BuildHooksEnv called for NoHooksLUs"
412 """Tasklet base class.
414 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
415 they can mix legacy code with tasklets. Locking needs to be done in the LU,
416 tasklets know nothing about locks.
418 Subclasses must follow these rules:
419 - Implement CheckPrereq
423 def __init__(self, lu):
430 def CheckPrereq(self):
431 """Check prerequisites for this tasklets.
433 This method should check whether the prerequisites for the execution of
434 this tasklet are fulfilled. It can do internode communication, but it
435 should be idempotent - no cluster or system changes are allowed.
437 The method should raise errors.OpPrereqError in case something is not
438 fulfilled. Its return value is ignored.
440 This method should also update all parameters to their canonical form if it
441 hasn't been done before.
444 raise NotImplementedError
446 def Exec(self, feedback_fn):
447 """Execute the tasklet.
449 This method should implement the actual work. It should raise
450 errors.OpExecError for failures that are somewhat dealt with in code, or
454 raise NotImplementedError
457 def _GetWantedNodes(lu, nodes):
458 """Returns list of checked and expanded node names.
460 @type lu: L{LogicalUnit}
461 @param lu: the logical unit on whose behalf we execute
463 @param nodes: list of node names or None for all nodes
465 @return: the list of nodes, sorted
466 @raise errors.ProgrammerError: if the nodes parameter is wrong type
469 if not isinstance(nodes, list):
470 raise errors.OpPrereqError("Invalid argument type 'nodes'",
474 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
475 " non-empty list of nodes whose name is to be expanded.")
477 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
478 return utils.NiceSort(wanted)
481 def _GetWantedInstances(lu, instances):
482 """Returns list of checked and expanded instance names.
484 @type lu: L{LogicalUnit}
485 @param lu: the logical unit on whose behalf we execute
486 @type instances: list
487 @param instances: list of instance names or None for all instances
489 @return: the list of instances, sorted
490 @raise errors.OpPrereqError: if the instances parameter is wrong type
491 @raise errors.OpPrereqError: if any of the passed instances is not found
494 if not isinstance(instances, list):
495 raise errors.OpPrereqError("Invalid argument type 'instances'",
499 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
501 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
505 def _GetUpdatedParams(old_params, update_dict,
506 use_default=True, use_none=False):
507 """Return the new version of a parameter dictionary.
509 @type old_params: dict
510 @param old_params: old parameters
511 @type update_dict: dict
512 @param update_dict: dict containing new parameter values, or
513 constants.VALUE_DEFAULT to reset the parameter to its default
515 @param use_default: boolean
516 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
517 values as 'to be deleted' values
518 @param use_none: boolean
519 @type use_none: whether to recognise C{None} values as 'to be
522 @return: the new parameter dictionary
525 params_copy = copy.deepcopy(old_params)
526 for key, val in update_dict.iteritems():
527 if ((use_default and val == constants.VALUE_DEFAULT) or
528 (use_none and val is None)):
534 params_copy[key] = val
538 def _CheckOutputFields(static, dynamic, selected):
539 """Checks whether all selected fields are valid.
541 @type static: L{utils.FieldSet}
542 @param static: static fields set
543 @type dynamic: L{utils.FieldSet}
544 @param dynamic: dynamic fields set
551 delta = f.NonMatching(selected)
553 raise errors.OpPrereqError("Unknown output fields selected: %s"
554 % ",".join(delta), errors.ECODE_INVAL)
557 def _CheckBooleanOpField(op, name):
558 """Validates boolean opcode parameters.
560 This will ensure that an opcode parameter is either a boolean value,
561 or None (but that it always exists).
564 val = getattr(op, name, None)
565 if not (val is None or isinstance(val, bool)):
566 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
567 (name, str(val)), errors.ECODE_INVAL)
568 setattr(op, name, val)
571 def _CheckGlobalHvParams(params):
572 """Validates that given hypervisor params are not global ones.
574 This will ensure that instances don't get customised versions of
578 used_globals = constants.HVC_GLOBALS.intersection(params)
580 msg = ("The following hypervisor parameters are global and cannot"
581 " be customized at instance level, please modify them at"
582 " cluster level: %s" % utils.CommaJoin(used_globals))
583 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
586 def _CheckNodeOnline(lu, node):
587 """Ensure that a given node is online.
589 @param lu: the LU on behalf of which we make the check
590 @param node: the node to check
591 @raise errors.OpPrereqError: if the node is offline
594 if lu.cfg.GetNodeInfo(node).offline:
595 raise errors.OpPrereqError("Can't use offline node %s" % node,
599 def _CheckNodeNotDrained(lu, node):
600 """Ensure that a given node is not drained.
602 @param lu: the LU on behalf of which we make the check
603 @param node: the node to check
604 @raise errors.OpPrereqError: if the node is drained
607 if lu.cfg.GetNodeInfo(node).drained:
608 raise errors.OpPrereqError("Can't use drained node %s" % node,
612 def _CheckNodeHasOS(lu, node, os_name, force_variant):
613 """Ensure that a node supports a given OS.
615 @param lu: the LU on behalf of which we make the check
616 @param node: the node to check
617 @param os_name: the OS to query about
618 @param force_variant: whether to ignore variant errors
619 @raise errors.OpPrereqError: if the node is not supporting the OS
622 result = lu.rpc.call_os_get(node, os_name)
623 result.Raise("OS '%s' not in supported OS list for node %s" %
625 prereq=True, ecode=errors.ECODE_INVAL)
626 if not force_variant:
627 _CheckOSVariant(result.payload, os_name)
630 def _RequireFileStorage():
631 """Checks that file storage is enabled.
633 @raise errors.OpPrereqError: when file storage is disabled
636 if not constants.ENABLE_FILE_STORAGE:
637 raise errors.OpPrereqError("File storage disabled at configure time",
641 def _CheckDiskTemplate(template):
642 """Ensure a given disk template is valid.
645 if template not in constants.DISK_TEMPLATES:
646 msg = ("Invalid disk template name '%s', valid templates are: %s" %
647 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
648 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
649 if template == constants.DT_FILE:
650 _RequireFileStorage()
653 def _CheckStorageType(storage_type):
654 """Ensure a given storage type is valid.
657 if storage_type not in constants.VALID_STORAGE_TYPES:
658 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
660 if storage_type == constants.ST_FILE:
661 _RequireFileStorage()
664 def _GetClusterDomainSecret():
665 """Reads the cluster domain secret.
668 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
672 def _CheckInstanceDown(lu, instance, reason):
673 """Ensure that an instance is not running."""
674 if instance.admin_up:
675 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
676 (instance.name, reason), errors.ECODE_STATE)
678 pnode = instance.primary_node
679 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
680 ins_l.Raise("Can't contact node %s for instance information" % pnode,
681 prereq=True, ecode=errors.ECODE_ENVIRON)
683 if instance.name in ins_l.payload:
684 raise errors.OpPrereqError("Instance %s is running, %s" %
685 (instance.name, reason), errors.ECODE_STATE)
688 def _ExpandItemName(fn, name, kind):
689 """Expand an item name.
691 @param fn: the function to use for expansion
692 @param name: requested item name
693 @param kind: text description ('Node' or 'Instance')
694 @return: the resolved (full) name
695 @raise errors.OpPrereqError: if the item is not found
699 if full_name is None:
700 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
705 def _ExpandNodeName(cfg, name):
706 """Wrapper over L{_ExpandItemName} for nodes."""
707 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
710 def _ExpandInstanceName(cfg, name):
711 """Wrapper over L{_ExpandItemName} for instance."""
712 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
715 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
716 memory, vcpus, nics, disk_template, disks,
717 bep, hvp, hypervisor_name):
718 """Builds instance related env variables for hooks
720 This builds the hook environment from individual variables.
723 @param name: the name of the instance
724 @type primary_node: string
725 @param primary_node: the name of the instance's primary node
726 @type secondary_nodes: list
727 @param secondary_nodes: list of secondary nodes as strings
728 @type os_type: string
729 @param os_type: the name of the instance's OS
730 @type status: boolean
731 @param status: the should_run status of the instance
733 @param memory: the memory size of the instance
735 @param vcpus: the count of VCPUs the instance has
737 @param nics: list of tuples (ip, mac, mode, link) representing
738 the NICs the instance has
739 @type disk_template: string
740 @param disk_template: the disk template of the instance
742 @param disks: the list of (size, mode) pairs
744 @param bep: the backend parameters for the instance
746 @param hvp: the hypervisor parameters for the instance
747 @type hypervisor_name: string
748 @param hypervisor_name: the hypervisor for the instance
750 @return: the hook environment for this instance
759 "INSTANCE_NAME": name,
760 "INSTANCE_PRIMARY": primary_node,
761 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
762 "INSTANCE_OS_TYPE": os_type,
763 "INSTANCE_STATUS": str_status,
764 "INSTANCE_MEMORY": memory,
765 "INSTANCE_VCPUS": vcpus,
766 "INSTANCE_DISK_TEMPLATE": disk_template,
767 "INSTANCE_HYPERVISOR": hypervisor_name,
771 nic_count = len(nics)
772 for idx, (ip, mac, mode, link) in enumerate(nics):
775 env["INSTANCE_NIC%d_IP" % idx] = ip
776 env["INSTANCE_NIC%d_MAC" % idx] = mac
777 env["INSTANCE_NIC%d_MODE" % idx] = mode
778 env["INSTANCE_NIC%d_LINK" % idx] = link
779 if mode == constants.NIC_MODE_BRIDGED:
780 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
784 env["INSTANCE_NIC_COUNT"] = nic_count
787 disk_count = len(disks)
788 for idx, (size, mode) in enumerate(disks):
789 env["INSTANCE_DISK%d_SIZE" % idx] = size
790 env["INSTANCE_DISK%d_MODE" % idx] = mode
794 env["INSTANCE_DISK_COUNT"] = disk_count
796 for source, kind in [(bep, "BE"), (hvp, "HV")]:
797 for key, value in source.items():
798 env["INSTANCE_%s_%s" % (kind, key)] = value
803 def _NICListToTuple(lu, nics):
804 """Build a list of nic information tuples.
806 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
807 value in LUQueryInstanceData.
809 @type lu: L{LogicalUnit}
810 @param lu: the logical unit on whose behalf we execute
811 @type nics: list of L{objects.NIC}
812 @param nics: list of nics to convert to hooks tuples
816 cluster = lu.cfg.GetClusterInfo()
820 filled_params = cluster.SimpleFillNIC(nic.nicparams)
821 mode = filled_params[constants.NIC_MODE]
822 link = filled_params[constants.NIC_LINK]
823 hooks_nics.append((ip, mac, mode, link))
827 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
828 """Builds instance related env variables for hooks from an object.
830 @type lu: L{LogicalUnit}
831 @param lu: the logical unit on whose behalf we execute
832 @type instance: L{objects.Instance}
833 @param instance: the instance for which we should build the
836 @param override: dictionary with key/values that will override
839 @return: the hook environment dictionary
842 cluster = lu.cfg.GetClusterInfo()
843 bep = cluster.FillBE(instance)
844 hvp = cluster.FillHV(instance)
846 'name': instance.name,
847 'primary_node': instance.primary_node,
848 'secondary_nodes': instance.secondary_nodes,
849 'os_type': instance.os,
850 'status': instance.admin_up,
851 'memory': bep[constants.BE_MEMORY],
852 'vcpus': bep[constants.BE_VCPUS],
853 'nics': _NICListToTuple(lu, instance.nics),
854 'disk_template': instance.disk_template,
855 'disks': [(disk.size, disk.mode) for disk in instance.disks],
858 'hypervisor_name': instance.hypervisor,
861 args.update(override)
862 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
865 def _AdjustCandidatePool(lu, exceptions):
866 """Adjust the candidate pool after node operations.
869 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
871 lu.LogInfo("Promoted nodes to master candidate role: %s",
872 utils.CommaJoin(node.name for node in mod_list))
873 for name in mod_list:
874 lu.context.ReaddNode(name)
875 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
877 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
881 def _DecideSelfPromotion(lu, exceptions=None):
882 """Decide whether I should promote myself as a master candidate.
885 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
886 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
887 # the new node will increase mc_max with one, so:
888 mc_should = min(mc_should + 1, cp_size)
889 return mc_now < mc_should
892 def _CheckNicsBridgesExist(lu, target_nics, target_node):
893 """Check that the brigdes needed by a list of nics exist.
896 cluster = lu.cfg.GetClusterInfo()
897 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
898 brlist = [params[constants.NIC_LINK] for params in paramslist
899 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
901 result = lu.rpc.call_bridges_exist(target_node, brlist)
902 result.Raise("Error checking bridges on destination node '%s'" %
903 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
906 def _CheckInstanceBridgesExist(lu, instance, node=None):
907 """Check that the brigdes needed by an instance exist.
911 node = instance.primary_node
912 _CheckNicsBridgesExist(lu, instance.nics, node)
915 def _CheckOSVariant(os_obj, name):
916 """Check whether an OS name conforms to the os variants specification.
918 @type os_obj: L{objects.OS}
919 @param os_obj: OS object to check
921 @param name: OS name passed by the user, to check for validity
924 if not os_obj.supported_variants:
927 variant = name.split("+", 1)[1]
929 raise errors.OpPrereqError("OS name must include a variant",
932 if variant not in os_obj.supported_variants:
933 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
936 def _GetNodeInstancesInner(cfg, fn):
937 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
940 def _GetNodeInstances(cfg, node_name):
941 """Returns a list of all primary and secondary instances on a node.
945 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
948 def _GetNodePrimaryInstances(cfg, node_name):
949 """Returns primary instances on a node.
952 return _GetNodeInstancesInner(cfg,
953 lambda inst: node_name == inst.primary_node)
956 def _GetNodeSecondaryInstances(cfg, node_name):
957 """Returns secondary instances on a node.
960 return _GetNodeInstancesInner(cfg,
961 lambda inst: node_name in inst.secondary_nodes)
964 def _GetStorageTypeArgs(cfg, storage_type):
965 """Returns the arguments for a storage type.
968 # Special case for file storage
969 if storage_type == constants.ST_FILE:
970 # storage.FileStorage wants a list of storage directories
971 return [[cfg.GetFileStorageDir()]]
976 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
979 for dev in instance.disks:
980 cfg.SetDiskID(dev, node_name)
982 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
983 result.Raise("Failed to get disk status from node %s" % node_name,
984 prereq=prereq, ecode=errors.ECODE_ENVIRON)
986 for idx, bdev_status in enumerate(result.payload):
987 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
993 class LUPostInitCluster(LogicalUnit):
994 """Logical unit for running hooks after cluster initialization.
997 HPATH = "cluster-init"
998 HTYPE = constants.HTYPE_CLUSTER
1001 def BuildHooksEnv(self):
1005 env = {"OP_TARGET": self.cfg.GetClusterName()}
1006 mn = self.cfg.GetMasterNode()
1007 return env, [], [mn]
1009 def CheckPrereq(self):
1010 """No prerequisites to check.
1015 def Exec(self, feedback_fn):
1022 class LUDestroyCluster(LogicalUnit):
1023 """Logical unit for destroying the cluster.
1026 HPATH = "cluster-destroy"
1027 HTYPE = constants.HTYPE_CLUSTER
1030 def BuildHooksEnv(self):
1034 env = {"OP_TARGET": self.cfg.GetClusterName()}
1037 def CheckPrereq(self):
1038 """Check prerequisites.
1040 This checks whether the cluster is empty.
1042 Any errors are signaled by raising errors.OpPrereqError.
1045 master = self.cfg.GetMasterNode()
1047 nodelist = self.cfg.GetNodeList()
1048 if len(nodelist) != 1 or nodelist[0] != master:
1049 raise errors.OpPrereqError("There are still %d node(s) in"
1050 " this cluster." % (len(nodelist) - 1),
1052 instancelist = self.cfg.GetInstanceList()
1054 raise errors.OpPrereqError("There are still %d instance(s) in"
1055 " this cluster." % len(instancelist),
1058 def Exec(self, feedback_fn):
1059 """Destroys the cluster.
1062 master = self.cfg.GetMasterNode()
1063 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1065 # Run post hooks on master node before it's removed
1066 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1068 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1070 # pylint: disable-msg=W0702
1071 self.LogWarning("Errors occurred running hooks on %s" % master)
1073 result = self.rpc.call_node_stop_master(master, False)
1074 result.Raise("Could not disable the master role")
1076 if modify_ssh_setup:
1077 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1078 utils.CreateBackup(priv_key)
1079 utils.CreateBackup(pub_key)
1084 def _VerifyCertificate(filename):
1085 """Verifies a certificate for LUVerifyCluster.
1087 @type filename: string
1088 @param filename: Path to PEM file
1092 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1093 utils.ReadFile(filename))
1094 except Exception, err: # pylint: disable-msg=W0703
1095 return (LUVerifyCluster.ETYPE_ERROR,
1096 "Failed to load X509 certificate %s: %s" % (filename, err))
1099 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1100 constants.SSL_CERT_EXPIRATION_ERROR)
1103 fnamemsg = "While verifying %s: %s" % (filename, msg)
1108 return (None, fnamemsg)
1109 elif errcode == utils.CERT_WARNING:
1110 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1111 elif errcode == utils.CERT_ERROR:
1112 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1114 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1117 class LUVerifyCluster(LogicalUnit):
1118 """Verifies the cluster status.
1121 HPATH = "cluster-verify"
1122 HTYPE = constants.HTYPE_CLUSTER
1123 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1126 TCLUSTER = "cluster"
1128 TINSTANCE = "instance"
1130 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1131 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1132 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1133 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1134 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1135 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1136 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1137 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1138 ENODEDRBD = (TNODE, "ENODEDRBD")
1139 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1140 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1141 ENODEHV = (TNODE, "ENODEHV")
1142 ENODELVM = (TNODE, "ENODELVM")
1143 ENODEN1 = (TNODE, "ENODEN1")
1144 ENODENET = (TNODE, "ENODENET")
1145 ENODEOS = (TNODE, "ENODEOS")
1146 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1147 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1148 ENODERPC = (TNODE, "ENODERPC")
1149 ENODESSH = (TNODE, "ENODESSH")
1150 ENODEVERSION = (TNODE, "ENODEVERSION")
1151 ENODESETUP = (TNODE, "ENODESETUP")
1152 ENODETIME = (TNODE, "ENODETIME")
1154 ETYPE_FIELD = "code"
1155 ETYPE_ERROR = "ERROR"
1156 ETYPE_WARNING = "WARNING"
1158 class NodeImage(object):
1159 """A class representing the logical and physical status of a node.
1162 @ivar name: the node name to which this object refers
1163 @ivar volumes: a structure as returned from
1164 L{ganeti.backend.GetVolumeList} (runtime)
1165 @ivar instances: a list of running instances (runtime)
1166 @ivar pinst: list of configured primary instances (config)
1167 @ivar sinst: list of configured secondary instances (config)
1168 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1169 of this node (config)
1170 @ivar mfree: free memory, as reported by hypervisor (runtime)
1171 @ivar dfree: free disk, as reported by the node (runtime)
1172 @ivar offline: the offline status (config)
1173 @type rpc_fail: boolean
1174 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1175 not whether the individual keys were correct) (runtime)
1176 @type lvm_fail: boolean
1177 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1178 @type hyp_fail: boolean
1179 @ivar hyp_fail: whether the RPC call didn't return the instance list
1180 @type ghost: boolean
1181 @ivar ghost: whether this is a known node or not (config)
1182 @type os_fail: boolean
1183 @ivar os_fail: whether the RPC call didn't return valid OS data
1185 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1188 def __init__(self, offline=False, name=None):
1197 self.offline = offline
1198 self.rpc_fail = False
1199 self.lvm_fail = False
1200 self.hyp_fail = False
1202 self.os_fail = False
1205 def ExpandNames(self):
1206 self.needed_locks = {
1207 locking.LEVEL_NODE: locking.ALL_SET,
1208 locking.LEVEL_INSTANCE: locking.ALL_SET,
1210 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1212 def _Error(self, ecode, item, msg, *args, **kwargs):
1213 """Format an error message.
1215 Based on the opcode's error_codes parameter, either format a
1216 parseable error code, or a simpler error string.
1218 This must be called only from Exec and functions called from Exec.
1221 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1223 # first complete the msg
1226 # then format the whole message
1227 if self.op.error_codes:
1228 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1234 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1235 # and finally report it via the feedback_fn
1236 self._feedback_fn(" - %s" % msg)
1238 def _ErrorIf(self, cond, *args, **kwargs):
1239 """Log an error message if the passed condition is True.
1242 cond = bool(cond) or self.op.debug_simulate_errors
1244 self._Error(*args, **kwargs)
1245 # do not mark the operation as failed for WARN cases only
1246 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1247 self.bad = self.bad or cond
1249 def _VerifyNode(self, ninfo, nresult):
1250 """Run multiple tests against a node.
1254 - compares ganeti version
1255 - checks vg existence and size > 20G
1256 - checks config file checksum
1257 - checks ssh to other nodes
1259 @type ninfo: L{objects.Node}
1260 @param ninfo: the node to check
1261 @param nresult: the results from the node
1263 @return: whether overall this call was successful (and we can expect
1264 reasonable values in the respose)
1268 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1270 # main result, nresult should be a non-empty dict
1271 test = not nresult or not isinstance(nresult, dict)
1272 _ErrorIf(test, self.ENODERPC, node,
1273 "unable to verify node: no data returned")
1277 # compares ganeti version
1278 local_version = constants.PROTOCOL_VERSION
1279 remote_version = nresult.get("version", None)
1280 test = not (remote_version and
1281 isinstance(remote_version, (list, tuple)) and
1282 len(remote_version) == 2)
1283 _ErrorIf(test, self.ENODERPC, node,
1284 "connection to node returned invalid data")
1288 test = local_version != remote_version[0]
1289 _ErrorIf(test, self.ENODEVERSION, node,
1290 "incompatible protocol versions: master %s,"
1291 " node %s", local_version, remote_version[0])
1295 # node seems compatible, we can actually try to look into its results
1297 # full package version
1298 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1299 self.ENODEVERSION, node,
1300 "software version mismatch: master %s, node %s",
1301 constants.RELEASE_VERSION, remote_version[1],
1302 code=self.ETYPE_WARNING)
1304 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1305 if isinstance(hyp_result, dict):
1306 for hv_name, hv_result in hyp_result.iteritems():
1307 test = hv_result is not None
1308 _ErrorIf(test, self.ENODEHV, node,
1309 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1312 test = nresult.get(constants.NV_NODESETUP,
1313 ["Missing NODESETUP results"])
1314 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1319 def _VerifyNodeTime(self, ninfo, nresult,
1320 nvinfo_starttime, nvinfo_endtime):
1321 """Check the node time.
1323 @type ninfo: L{objects.Node}
1324 @param ninfo: the node to check
1325 @param nresult: the remote results for the node
1326 @param nvinfo_starttime: the start time of the RPC call
1327 @param nvinfo_endtime: the end time of the RPC call
1331 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1333 ntime = nresult.get(constants.NV_TIME, None)
1335 ntime_merged = utils.MergeTime(ntime)
1336 except (ValueError, TypeError):
1337 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1340 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1341 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1342 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1343 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1347 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1348 "Node time diverges by at least %s from master node time",
1351 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1352 """Check the node time.
1354 @type ninfo: L{objects.Node}
1355 @param ninfo: the node to check
1356 @param nresult: the remote results for the node
1357 @param vg_name: the configured VG name
1364 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1366 # checks vg existence and size > 20G
1367 vglist = nresult.get(constants.NV_VGLIST, None)
1369 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1371 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1372 constants.MIN_VG_SIZE)
1373 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1376 pvlist = nresult.get(constants.NV_PVLIST, None)
1377 test = pvlist is None
1378 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1380 # check that ':' is not present in PV names, since it's a
1381 # special character for lvcreate (denotes the range of PEs to
1383 for _, pvname, owner_vg in pvlist:
1384 test = ":" in pvname
1385 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1386 " '%s' of VG '%s'", pvname, owner_vg)
1388 def _VerifyNodeNetwork(self, ninfo, nresult):
1389 """Check the node time.
1391 @type ninfo: L{objects.Node}
1392 @param ninfo: the node to check
1393 @param nresult: the remote results for the node
1397 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1399 test = constants.NV_NODELIST not in nresult
1400 _ErrorIf(test, self.ENODESSH, node,
1401 "node hasn't returned node ssh connectivity data")
1403 if nresult[constants.NV_NODELIST]:
1404 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1405 _ErrorIf(True, self.ENODESSH, node,
1406 "ssh communication with node '%s': %s", a_node, a_msg)
1408 test = constants.NV_NODENETTEST not in nresult
1409 _ErrorIf(test, self.ENODENET, node,
1410 "node hasn't returned node tcp connectivity data")
1412 if nresult[constants.NV_NODENETTEST]:
1413 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1415 _ErrorIf(True, self.ENODENET, node,
1416 "tcp communication with node '%s': %s",
1417 anode, nresult[constants.NV_NODENETTEST][anode])
1419 test = constants.NV_MASTERIP not in nresult
1420 _ErrorIf(test, self.ENODENET, node,
1421 "node hasn't returned node master IP reachability data")
1423 if not nresult[constants.NV_MASTERIP]:
1424 if node == self.master_node:
1425 msg = "the master node cannot reach the master IP (not configured?)"
1427 msg = "cannot reach the master IP"
1428 _ErrorIf(True, self.ENODENET, node, msg)
1431 def _VerifyInstance(self, instance, instanceconfig, node_image):
1432 """Verify an instance.
1434 This function checks to see if the required block devices are
1435 available on the instance's node.
1438 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1439 node_current = instanceconfig.primary_node
1441 node_vol_should = {}
1442 instanceconfig.MapLVsByNode(node_vol_should)
1444 for node in node_vol_should:
1445 n_img = node_image[node]
1446 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1447 # ignore missing volumes on offline or broken nodes
1449 for volume in node_vol_should[node]:
1450 test = volume not in n_img.volumes
1451 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1452 "volume %s missing on node %s", volume, node)
1454 if instanceconfig.admin_up:
1455 pri_img = node_image[node_current]
1456 test = instance not in pri_img.instances and not pri_img.offline
1457 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1458 "instance not running on its primary node %s",
1461 for node, n_img in node_image.items():
1462 if (not node == node_current):
1463 test = instance in n_img.instances
1464 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1465 "instance should not run on node %s", node)
1467 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1468 """Verify if there are any unknown volumes in the cluster.
1470 The .os, .swap and backup volumes are ignored. All other volumes are
1471 reported as unknown.
1474 for node, n_img in node_image.items():
1475 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1476 # skip non-healthy nodes
1478 for volume in n_img.volumes:
1479 test = (node not in node_vol_should or
1480 volume not in node_vol_should[node])
1481 self._ErrorIf(test, self.ENODEORPHANLV, node,
1482 "volume %s is unknown", volume)
1484 def _VerifyOrphanInstances(self, instancelist, node_image):
1485 """Verify the list of running instances.
1487 This checks what instances are running but unknown to the cluster.
1490 for node, n_img in node_image.items():
1491 for o_inst in n_img.instances:
1492 test = o_inst not in instancelist
1493 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1494 "instance %s on node %s should not exist", o_inst, node)
1496 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1497 """Verify N+1 Memory Resilience.
1499 Check that if one single node dies we can still start all the
1500 instances it was primary for.
1503 for node, n_img in node_image.items():
1504 # This code checks that every node which is now listed as
1505 # secondary has enough memory to host all instances it is
1506 # supposed to should a single other node in the cluster fail.
1507 # FIXME: not ready for failover to an arbitrary node
1508 # FIXME: does not support file-backed instances
1509 # WARNING: we currently take into account down instances as well
1510 # as up ones, considering that even if they're down someone
1511 # might want to start them even in the event of a node failure.
1512 for prinode, instances in n_img.sbp.items():
1514 for instance in instances:
1515 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1516 if bep[constants.BE_AUTO_BALANCE]:
1517 needed_mem += bep[constants.BE_MEMORY]
1518 test = n_img.mfree < needed_mem
1519 self._ErrorIf(test, self.ENODEN1, node,
1520 "not enough memory on to accommodate"
1521 " failovers should peer node %s fail", prinode)
1523 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1525 """Verifies and computes the node required file checksums.
1527 @type ninfo: L{objects.Node}
1528 @param ninfo: the node to check
1529 @param nresult: the remote results for the node
1530 @param file_list: required list of files
1531 @param local_cksum: dictionary of local files and their checksums
1532 @param master_files: list of files that only masters should have
1536 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1538 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1539 test = not isinstance(remote_cksum, dict)
1540 _ErrorIf(test, self.ENODEFILECHECK, node,
1541 "node hasn't returned file checksum data")
1545 for file_name in file_list:
1546 node_is_mc = ninfo.master_candidate
1547 must_have = (file_name not in master_files) or node_is_mc
1549 test1 = file_name not in remote_cksum
1551 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1553 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1554 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1555 "file '%s' missing", file_name)
1556 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1557 "file '%s' has wrong checksum", file_name)
1558 # not candidate and this is not a must-have file
1559 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1560 "file '%s' should not exist on non master"
1561 " candidates (and the file is outdated)", file_name)
1562 # all good, except non-master/non-must have combination
1563 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1564 "file '%s' should not exist"
1565 " on non master candidates", file_name)
1567 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1568 """Verifies and the node DRBD status.
1570 @type ninfo: L{objects.Node}
1571 @param ninfo: the node to check
1572 @param nresult: the remote results for the node
1573 @param instanceinfo: the dict of instances
1574 @param drbd_map: the DRBD map as returned by
1575 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1579 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1581 # compute the DRBD minors
1583 for minor, instance in drbd_map[node].items():
1584 test = instance not in instanceinfo
1585 _ErrorIf(test, self.ECLUSTERCFG, None,
1586 "ghost instance '%s' in temporary DRBD map", instance)
1587 # ghost instance should not be running, but otherwise we
1588 # don't give double warnings (both ghost instance and
1589 # unallocated minor in use)
1591 node_drbd[minor] = (instance, False)
1593 instance = instanceinfo[instance]
1594 node_drbd[minor] = (instance.name, instance.admin_up)
1596 # and now check them
1597 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1598 test = not isinstance(used_minors, (tuple, list))
1599 _ErrorIf(test, self.ENODEDRBD, node,
1600 "cannot parse drbd status file: %s", str(used_minors))
1602 # we cannot check drbd status
1605 for minor, (iname, must_exist) in node_drbd.items():
1606 test = minor not in used_minors and must_exist
1607 _ErrorIf(test, self.ENODEDRBD, node,
1608 "drbd minor %d of instance %s is not active", minor, iname)
1609 for minor in used_minors:
1610 test = minor not in node_drbd
1611 _ErrorIf(test, self.ENODEDRBD, node,
1612 "unallocated drbd minor %d is in use", minor)
1614 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1615 """Builds the node OS structures.
1617 @type ninfo: L{objects.Node}
1618 @param ninfo: the node to check
1619 @param nresult: the remote results for the node
1620 @param nimg: the node image object
1624 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1626 remote_os = nresult.get(constants.NV_OSLIST, None)
1627 test = (not isinstance(remote_os, list) or
1628 not compat.all(remote_os,
1629 lambda v: isinstance(v, list) and len(v) == 7))
1631 _ErrorIf(test, self.ENODEOS, node,
1632 "node hasn't returned valid OS data")
1641 for (name, os_path, status, diagnose,
1642 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1644 if name not in os_dict:
1647 # parameters is a list of lists instead of list of tuples due to
1648 # JSON lacking a real tuple type, fix it:
1649 parameters = [tuple(v) for v in parameters]
1650 os_dict[name].append((os_path, status, diagnose,
1651 set(variants), set(parameters), set(api_ver)))
1653 nimg.oslist = os_dict
1655 def _VerifyNodeOS(self, ninfo, nimg, base):
1656 """Verifies the node OS list.
1658 @type ninfo: L{objects.Node}
1659 @param ninfo: the node to check
1660 @param nimg: the node image object
1661 @param base: the 'template' node we match against (e.g. from the master)
1665 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1667 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1669 for os_name, os_data in nimg.oslist.items():
1670 assert os_data, "Empty OS status for OS %s?!" % os_name
1671 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1672 _ErrorIf(not f_status, self.ENODEOS, node,
1673 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1674 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1675 "OS '%s' has multiple entries (first one shadows the rest): %s",
1676 os_name, utils.CommaJoin([v[0] for v in os_data]))
1677 # this will catched in backend too
1678 _ErrorIf(compat.any(f_api, lambda v: v >= constants.OS_API_V15)
1679 and not f_var, self.ENODEOS, node,
1680 "OS %s with API at least %d does not declare any variant",
1681 os_name, constants.OS_API_V15)
1682 # comparisons with the 'base' image
1683 test = os_name not in base.oslist
1684 _ErrorIf(test, self.ENODEOS, node,
1685 "Extra OS %s not present on reference node (%s)",
1689 assert base.oslist[os_name], "Base node has empty OS status?"
1690 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1692 # base OS is invalid, skipping
1694 for kind, a, b in [("API version", f_api, b_api),
1695 ("variants list", f_var, b_var),
1696 ("parameters", f_param, b_param)]:
1697 _ErrorIf(a != b, self.ENODEOS, node,
1698 "OS %s %s differs from reference node %s: %s vs. %s",
1699 kind, os_name, base.name,
1700 utils.CommaJoin(a), utils.CommaJoin(a))
1702 # check any missing OSes
1703 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1704 _ErrorIf(missing, self.ENODEOS, node,
1705 "OSes present on reference node %s but missing on this node: %s",
1706 base.name, utils.CommaJoin(missing))
1708 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1709 """Verifies and updates the node volume data.
1711 This function will update a L{NodeImage}'s internal structures
1712 with data from the remote call.
1714 @type ninfo: L{objects.Node}
1715 @param ninfo: the node to check
1716 @param nresult: the remote results for the node
1717 @param nimg: the node image object
1718 @param vg_name: the configured VG name
1722 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1724 nimg.lvm_fail = True
1725 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1728 elif isinstance(lvdata, basestring):
1729 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1730 utils.SafeEncode(lvdata))
1731 elif not isinstance(lvdata, dict):
1732 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1734 nimg.volumes = lvdata
1735 nimg.lvm_fail = False
1737 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1738 """Verifies and updates the node instance list.
1740 If the listing was successful, then updates this node's instance
1741 list. Otherwise, it marks the RPC call as failed for the instance
1744 @type ninfo: L{objects.Node}
1745 @param ninfo: the node to check
1746 @param nresult: the remote results for the node
1747 @param nimg: the node image object
1750 idata = nresult.get(constants.NV_INSTANCELIST, None)
1751 test = not isinstance(idata, list)
1752 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1753 " (instancelist): %s", utils.SafeEncode(str(idata)))
1755 nimg.hyp_fail = True
1757 nimg.instances = idata
1759 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1760 """Verifies and computes a node information map
1762 @type ninfo: L{objects.Node}
1763 @param ninfo: the node to check
1764 @param nresult: the remote results for the node
1765 @param nimg: the node image object
1766 @param vg_name: the configured VG name
1770 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1772 # try to read free memory (from the hypervisor)
1773 hv_info = nresult.get(constants.NV_HVINFO, None)
1774 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1775 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1778 nimg.mfree = int(hv_info["memory_free"])
1779 except (ValueError, TypeError):
1780 _ErrorIf(True, self.ENODERPC, node,
1781 "node returned invalid nodeinfo, check hypervisor")
1783 # FIXME: devise a free space model for file based instances as well
1784 if vg_name is not None:
1785 test = (constants.NV_VGLIST not in nresult or
1786 vg_name not in nresult[constants.NV_VGLIST])
1787 _ErrorIf(test, self.ENODELVM, node,
1788 "node didn't return data for the volume group '%s'"
1789 " - it is either missing or broken", vg_name)
1792 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1793 except (ValueError, TypeError):
1794 _ErrorIf(True, self.ENODERPC, node,
1795 "node returned invalid LVM info, check LVM status")
1797 def CheckPrereq(self):
1798 """Check prerequisites.
1800 Transform the list of checks we're going to skip into a set and check that
1801 all its members are valid.
1804 self.skip_set = frozenset(self.op.skip_checks)
1805 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1806 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1809 def BuildHooksEnv(self):
1812 Cluster-Verify hooks just ran in the post phase and their failure makes
1813 the output be logged in the verify output and the verification to fail.
1816 all_nodes = self.cfg.GetNodeList()
1818 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1820 for node in self.cfg.GetAllNodesInfo().values():
1821 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1823 return env, [], all_nodes
1825 def Exec(self, feedback_fn):
1826 """Verify integrity of cluster, performing various test on nodes.
1830 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1831 verbose = self.op.verbose
1832 self._feedback_fn = feedback_fn
1833 feedback_fn("* Verifying global settings")
1834 for msg in self.cfg.VerifyConfig():
1835 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1837 # Check the cluster certificates
1838 for cert_filename in constants.ALL_CERT_FILES:
1839 (errcode, msg) = _VerifyCertificate(cert_filename)
1840 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1842 vg_name = self.cfg.GetVGName()
1843 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1844 cluster = self.cfg.GetClusterInfo()
1845 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1846 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1847 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1848 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1849 for iname in instancelist)
1850 i_non_redundant = [] # Non redundant instances
1851 i_non_a_balanced = [] # Non auto-balanced instances
1852 n_offline = 0 # Count of offline nodes
1853 n_drained = 0 # Count of nodes being drained
1854 node_vol_should = {}
1856 # FIXME: verify OS list
1857 # do local checksums
1858 master_files = [constants.CLUSTER_CONF_FILE]
1859 master_node = self.master_node = self.cfg.GetMasterNode()
1860 master_ip = self.cfg.GetMasterIP()
1862 file_names = ssconf.SimpleStore().GetFileList()
1863 file_names.extend(constants.ALL_CERT_FILES)
1864 file_names.extend(master_files)
1865 if cluster.modify_etc_hosts:
1866 file_names.append(constants.ETC_HOSTS)
1868 local_checksums = utils.FingerprintFiles(file_names)
1870 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1871 node_verify_param = {
1872 constants.NV_FILELIST: file_names,
1873 constants.NV_NODELIST: [node.name for node in nodeinfo
1874 if not node.offline],
1875 constants.NV_HYPERVISOR: hypervisors,
1876 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1877 node.secondary_ip) for node in nodeinfo
1878 if not node.offline],
1879 constants.NV_INSTANCELIST: hypervisors,
1880 constants.NV_VERSION: None,
1881 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1882 constants.NV_NODESETUP: None,
1883 constants.NV_TIME: None,
1884 constants.NV_MASTERIP: (master_node, master_ip),
1885 constants.NV_OSLIST: None,
1888 if vg_name is not None:
1889 node_verify_param[constants.NV_VGLIST] = None
1890 node_verify_param[constants.NV_LVLIST] = vg_name
1891 node_verify_param[constants.NV_PVLIST] = [vg_name]
1892 node_verify_param[constants.NV_DRBDLIST] = None
1894 # Build our expected cluster state
1895 node_image = dict((node.name, self.NodeImage(offline=node.offline,
1897 for node in nodeinfo)
1899 for instance in instancelist:
1900 inst_config = instanceinfo[instance]
1902 for nname in inst_config.all_nodes:
1903 if nname not in node_image:
1905 gnode = self.NodeImage(name=nname)
1907 node_image[nname] = gnode
1909 inst_config.MapLVsByNode(node_vol_should)
1911 pnode = inst_config.primary_node
1912 node_image[pnode].pinst.append(instance)
1914 for snode in inst_config.secondary_nodes:
1915 nimg = node_image[snode]
1916 nimg.sinst.append(instance)
1917 if pnode not in nimg.sbp:
1918 nimg.sbp[pnode] = []
1919 nimg.sbp[pnode].append(instance)
1921 # At this point, we have the in-memory data structures complete,
1922 # except for the runtime information, which we'll gather next
1924 # Due to the way our RPC system works, exact response times cannot be
1925 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1926 # time before and after executing the request, we can at least have a time
1928 nvinfo_starttime = time.time()
1929 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1930 self.cfg.GetClusterName())
1931 nvinfo_endtime = time.time()
1933 all_drbd_map = self.cfg.ComputeDRBDMap()
1935 feedback_fn("* Verifying node status")
1939 for node_i in nodeinfo:
1941 nimg = node_image[node]
1945 feedback_fn("* Skipping offline node %s" % (node,))
1949 if node == master_node:
1951 elif node_i.master_candidate:
1952 ntype = "master candidate"
1953 elif node_i.drained:
1959 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1961 msg = all_nvinfo[node].fail_msg
1962 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1964 nimg.rpc_fail = True
1967 nresult = all_nvinfo[node].payload
1969 nimg.call_ok = self._VerifyNode(node_i, nresult)
1970 self._VerifyNodeNetwork(node_i, nresult)
1971 self._VerifyNodeLVM(node_i, nresult, vg_name)
1972 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1974 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1975 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1977 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1978 self._UpdateNodeInstances(node_i, nresult, nimg)
1979 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1980 self._UpdateNodeOS(node_i, nresult, nimg)
1981 if not nimg.os_fail:
1982 if refos_img is None:
1984 self._VerifyNodeOS(node_i, nimg, refos_img)
1986 feedback_fn("* Verifying instance status")
1987 for instance in instancelist:
1989 feedback_fn("* Verifying instance %s" % instance)
1990 inst_config = instanceinfo[instance]
1991 self._VerifyInstance(instance, inst_config, node_image)
1992 inst_nodes_offline = []
1994 pnode = inst_config.primary_node
1995 pnode_img = node_image[pnode]
1996 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1997 self.ENODERPC, pnode, "instance %s, connection to"
1998 " primary node failed", instance)
2000 if pnode_img.offline:
2001 inst_nodes_offline.append(pnode)
2003 # If the instance is non-redundant we cannot survive losing its primary
2004 # node, so we are not N+1 compliant. On the other hand we have no disk
2005 # templates with more than one secondary so that situation is not well
2007 # FIXME: does not support file-backed instances
2008 if not inst_config.secondary_nodes:
2009 i_non_redundant.append(instance)
2010 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2011 instance, "instance has multiple secondary nodes: %s",
2012 utils.CommaJoin(inst_config.secondary_nodes),
2013 code=self.ETYPE_WARNING)
2015 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2016 i_non_a_balanced.append(instance)
2018 for snode in inst_config.secondary_nodes:
2019 s_img = node_image[snode]
2020 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2021 "instance %s, connection to secondary node failed", instance)
2024 inst_nodes_offline.append(snode)
2026 # warn that the instance lives on offline nodes
2027 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2028 "instance lives on offline node(s) %s",
2029 utils.CommaJoin(inst_nodes_offline))
2030 # ... or ghost nodes
2031 for node in inst_config.all_nodes:
2032 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2033 "instance lives on ghost node %s", node)
2035 feedback_fn("* Verifying orphan volumes")
2036 self._VerifyOrphanVolumes(node_vol_should, node_image)
2038 feedback_fn("* Verifying orphan instances")
2039 self._VerifyOrphanInstances(instancelist, node_image)
2041 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
2042 feedback_fn("* Verifying N+1 Memory redundancy")
2043 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2045 feedback_fn("* Other Notes")
2047 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2048 % len(i_non_redundant))
2050 if i_non_a_balanced:
2051 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2052 % len(i_non_a_balanced))
2055 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2058 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2062 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2063 """Analyze the post-hooks' result
2065 This method analyses the hook result, handles it, and sends some
2066 nicely-formatted feedback back to the user.
2068 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2069 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2070 @param hooks_results: the results of the multi-node hooks rpc call
2071 @param feedback_fn: function used send feedback back to the caller
2072 @param lu_result: previous Exec result
2073 @return: the new Exec result, based on the previous result
2077 # We only really run POST phase hooks, and are only interested in
2079 if phase == constants.HOOKS_PHASE_POST:
2080 # Used to change hooks' output to proper indentation
2081 indent_re = re.compile('^', re.M)
2082 feedback_fn("* Hooks Results")
2083 assert hooks_results, "invalid result from hooks"
2085 for node_name in hooks_results:
2086 res = hooks_results[node_name]
2088 test = msg and not res.offline
2089 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2090 "Communication failure in hooks execution: %s", msg)
2091 if res.offline or msg:
2092 # No need to investigate payload if node is offline or gave an error.
2093 # override manually lu_result here as _ErrorIf only
2094 # overrides self.bad
2097 for script, hkr, output in res.payload:
2098 test = hkr == constants.HKR_FAIL
2099 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2100 "Script %s failed, output:", script)
2102 output = indent_re.sub(' ', output)
2103 feedback_fn("%s" % output)
2109 class LUVerifyDisks(NoHooksLU):
2110 """Verifies the cluster disks status.
2116 def ExpandNames(self):
2117 self.needed_locks = {
2118 locking.LEVEL_NODE: locking.ALL_SET,
2119 locking.LEVEL_INSTANCE: locking.ALL_SET,
2121 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2123 def CheckPrereq(self):
2124 """Check prerequisites.
2126 This has no prerequisites.
2131 def Exec(self, feedback_fn):
2132 """Verify integrity of cluster disks.
2134 @rtype: tuple of three items
2135 @return: a tuple of (dict of node-to-node_error, list of instances
2136 which need activate-disks, dict of instance: (node, volume) for
2140 result = res_nodes, res_instances, res_missing = {}, [], {}
2142 vg_name = self.cfg.GetVGName()
2143 nodes = utils.NiceSort(self.cfg.GetNodeList())
2144 instances = [self.cfg.GetInstanceInfo(name)
2145 for name in self.cfg.GetInstanceList()]
2148 for inst in instances:
2150 if (not inst.admin_up or
2151 inst.disk_template not in constants.DTS_NET_MIRROR):
2153 inst.MapLVsByNode(inst_lvs)
2154 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2155 for node, vol_list in inst_lvs.iteritems():
2156 for vol in vol_list:
2157 nv_dict[(node, vol)] = inst
2162 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2166 node_res = node_lvs[node]
2167 if node_res.offline:
2169 msg = node_res.fail_msg
2171 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2172 res_nodes[node] = msg
2175 lvs = node_res.payload
2176 for lv_name, (_, _, lv_online) in lvs.items():
2177 inst = nv_dict.pop((node, lv_name), None)
2178 if (not lv_online and inst is not None
2179 and inst.name not in res_instances):
2180 res_instances.append(inst.name)
2182 # any leftover items in nv_dict are missing LVs, let's arrange the
2184 for key, inst in nv_dict.iteritems():
2185 if inst.name not in res_missing:
2186 res_missing[inst.name] = []
2187 res_missing[inst.name].append(key)
2192 class LURepairDiskSizes(NoHooksLU):
2193 """Verifies the cluster disks sizes.
2196 _OP_REQP = ["instances"]
2199 def CheckArguments(self):
2200 if not isinstance(self.op.instances, list):
2201 raise errors.OpPrereqError("Invalid argument type 'instances'",
2204 def ExpandNames(self):
2205 if self.op.instances:
2206 self.wanted_names = []
2207 for name in self.op.instances:
2208 full_name = _ExpandInstanceName(self.cfg, name)
2209 self.wanted_names.append(full_name)
2210 self.needed_locks = {
2211 locking.LEVEL_NODE: [],
2212 locking.LEVEL_INSTANCE: self.wanted_names,
2214 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2216 self.wanted_names = None
2217 self.needed_locks = {
2218 locking.LEVEL_NODE: locking.ALL_SET,
2219 locking.LEVEL_INSTANCE: locking.ALL_SET,
2221 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2223 def DeclareLocks(self, level):
2224 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2225 self._LockInstancesNodes(primary_only=True)
2227 def CheckPrereq(self):
2228 """Check prerequisites.
2230 This only checks the optional instance list against the existing names.
2233 if self.wanted_names is None:
2234 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2236 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2237 in self.wanted_names]
2239 def _EnsureChildSizes(self, disk):
2240 """Ensure children of the disk have the needed disk size.
2242 This is valid mainly for DRBD8 and fixes an issue where the
2243 children have smaller disk size.
2245 @param disk: an L{ganeti.objects.Disk} object
2248 if disk.dev_type == constants.LD_DRBD8:
2249 assert disk.children, "Empty children for DRBD8?"
2250 fchild = disk.children[0]
2251 mismatch = fchild.size < disk.size
2253 self.LogInfo("Child disk has size %d, parent %d, fixing",
2254 fchild.size, disk.size)
2255 fchild.size = disk.size
2257 # and we recurse on this child only, not on the metadev
2258 return self._EnsureChildSizes(fchild) or mismatch
2262 def Exec(self, feedback_fn):
2263 """Verify the size of cluster disks.
2266 # TODO: check child disks too
2267 # TODO: check differences in size between primary/secondary nodes
2269 for instance in self.wanted_instances:
2270 pnode = instance.primary_node
2271 if pnode not in per_node_disks:
2272 per_node_disks[pnode] = []
2273 for idx, disk in enumerate(instance.disks):
2274 per_node_disks[pnode].append((instance, idx, disk))
2277 for node, dskl in per_node_disks.items():
2278 newl = [v[2].Copy() for v in dskl]
2280 self.cfg.SetDiskID(dsk, node)
2281 result = self.rpc.call_blockdev_getsizes(node, newl)
2283 self.LogWarning("Failure in blockdev_getsizes call to node"
2284 " %s, ignoring", node)
2286 if len(result.data) != len(dskl):
2287 self.LogWarning("Invalid result from node %s, ignoring node results",
2290 for ((instance, idx, disk), size) in zip(dskl, result.data):
2292 self.LogWarning("Disk %d of instance %s did not return size"
2293 " information, ignoring", idx, instance.name)
2295 if not isinstance(size, (int, long)):
2296 self.LogWarning("Disk %d of instance %s did not return valid"
2297 " size information, ignoring", idx, instance.name)
2300 if size != disk.size:
2301 self.LogInfo("Disk %d of instance %s has mismatched size,"
2302 " correcting: recorded %d, actual %d", idx,
2303 instance.name, disk.size, size)
2305 self.cfg.Update(instance, feedback_fn)
2306 changed.append((instance.name, idx, size))
2307 if self._EnsureChildSizes(disk):
2308 self.cfg.Update(instance, feedback_fn)
2309 changed.append((instance.name, idx, disk.size))
2313 class LURenameCluster(LogicalUnit):
2314 """Rename the cluster.
2317 HPATH = "cluster-rename"
2318 HTYPE = constants.HTYPE_CLUSTER
2321 def BuildHooksEnv(self):
2326 "OP_TARGET": self.cfg.GetClusterName(),
2327 "NEW_NAME": self.op.name,
2329 mn = self.cfg.GetMasterNode()
2330 all_nodes = self.cfg.GetNodeList()
2331 return env, [mn], all_nodes
2333 def CheckPrereq(self):
2334 """Verify that the passed name is a valid one.
2337 hostname = utils.GetHostInfo(self.op.name)
2339 new_name = hostname.name
2340 self.ip = new_ip = hostname.ip
2341 old_name = self.cfg.GetClusterName()
2342 old_ip = self.cfg.GetMasterIP()
2343 if new_name == old_name and new_ip == old_ip:
2344 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2345 " cluster has changed",
2347 if new_ip != old_ip:
2348 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2349 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2350 " reachable on the network. Aborting." %
2351 new_ip, errors.ECODE_NOTUNIQUE)
2353 self.op.name = new_name
2355 def Exec(self, feedback_fn):
2356 """Rename the cluster.
2359 clustername = self.op.name
2362 # shutdown the master IP
2363 master = self.cfg.GetMasterNode()
2364 result = self.rpc.call_node_stop_master(master, False)
2365 result.Raise("Could not disable the master role")
2368 cluster = self.cfg.GetClusterInfo()
2369 cluster.cluster_name = clustername
2370 cluster.master_ip = ip
2371 self.cfg.Update(cluster, feedback_fn)
2373 # update the known hosts file
2374 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2375 node_list = self.cfg.GetNodeList()
2377 node_list.remove(master)
2380 result = self.rpc.call_upload_file(node_list,
2381 constants.SSH_KNOWN_HOSTS_FILE)
2382 for to_node, to_result in result.iteritems():
2383 msg = to_result.fail_msg
2385 msg = ("Copy of file %s to node %s failed: %s" %
2386 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2387 self.proc.LogWarning(msg)
2390 result = self.rpc.call_node_start_master(master, False, False)
2391 msg = result.fail_msg
2393 self.LogWarning("Could not re-enable the master role on"
2394 " the master, please restart manually: %s", msg)
2397 def _RecursiveCheckIfLVMBased(disk):
2398 """Check if the given disk or its children are lvm-based.
2400 @type disk: L{objects.Disk}
2401 @param disk: the disk to check
2403 @return: boolean indicating whether a LD_LV dev_type was found or not
2407 for chdisk in disk.children:
2408 if _RecursiveCheckIfLVMBased(chdisk):
2410 return disk.dev_type == constants.LD_LV
2413 class LUSetClusterParams(LogicalUnit):
2414 """Change the parameters of the cluster.
2417 HPATH = "cluster-modify"
2418 HTYPE = constants.HTYPE_CLUSTER
2421 ("candidate_pool_size", None),
2424 ("remove_uids", None),
2428 def CheckArguments(self):
2432 if self.op.candidate_pool_size is not None:
2434 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2435 except (ValueError, TypeError), err:
2436 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2437 str(err), errors.ECODE_INVAL)
2438 if self.op.candidate_pool_size < 1:
2439 raise errors.OpPrereqError("At least one master candidate needed",
2442 _CheckBooleanOpField(self.op, "maintain_node_health")
2444 if self.op.uid_pool:
2445 uidpool.CheckUidPool(self.op.uid_pool)
2447 if self.op.add_uids:
2448 uidpool.CheckUidPool(self.op.add_uids)
2450 if self.op.remove_uids:
2451 uidpool.CheckUidPool(self.op.remove_uids)
2453 def ExpandNames(self):
2454 # FIXME: in the future maybe other cluster params won't require checking on
2455 # all nodes to be modified.
2456 self.needed_locks = {
2457 locking.LEVEL_NODE: locking.ALL_SET,
2459 self.share_locks[locking.LEVEL_NODE] = 1
2461 def BuildHooksEnv(self):
2466 "OP_TARGET": self.cfg.GetClusterName(),
2467 "NEW_VG_NAME": self.op.vg_name,
2469 mn = self.cfg.GetMasterNode()
2470 return env, [mn], [mn]
2472 def CheckPrereq(self):
2473 """Check prerequisites.
2475 This checks whether the given params don't conflict and
2476 if the given volume group is valid.
2479 if self.op.vg_name is not None and not self.op.vg_name:
2480 instances = self.cfg.GetAllInstancesInfo().values()
2481 for inst in instances:
2482 for disk in inst.disks:
2483 if _RecursiveCheckIfLVMBased(disk):
2484 raise errors.OpPrereqError("Cannot disable lvm storage while"
2485 " lvm-based instances exist",
2488 node_list = self.acquired_locks[locking.LEVEL_NODE]
2490 # if vg_name not None, checks given volume group on all nodes
2492 vglist = self.rpc.call_vg_list(node_list)
2493 for node in node_list:
2494 msg = vglist[node].fail_msg
2496 # ignoring down node
2497 self.LogWarning("Error while gathering data on node %s"
2498 " (ignoring node): %s", node, msg)
2500 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2502 constants.MIN_VG_SIZE)
2504 raise errors.OpPrereqError("Error on node '%s': %s" %
2505 (node, vgstatus), errors.ECODE_ENVIRON)
2507 self.cluster = cluster = self.cfg.GetClusterInfo()
2508 # validate params changes
2509 if self.op.beparams:
2510 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2511 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2513 if self.op.nicparams:
2514 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2515 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2516 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2519 # check all instances for consistency
2520 for instance in self.cfg.GetAllInstancesInfo().values():
2521 for nic_idx, nic in enumerate(instance.nics):
2522 params_copy = copy.deepcopy(nic.nicparams)
2523 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2525 # check parameter syntax
2527 objects.NIC.CheckParameterSyntax(params_filled)
2528 except errors.ConfigurationError, err:
2529 nic_errors.append("Instance %s, nic/%d: %s" %
2530 (instance.name, nic_idx, err))
2532 # if we're moving instances to routed, check that they have an ip
2533 target_mode = params_filled[constants.NIC_MODE]
2534 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2535 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2536 (instance.name, nic_idx))
2538 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2539 "\n".join(nic_errors))
2541 # hypervisor list/parameters
2542 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2543 if self.op.hvparams:
2544 if not isinstance(self.op.hvparams, dict):
2545 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2547 for hv_name, hv_dict in self.op.hvparams.items():
2548 if hv_name not in self.new_hvparams:
2549 self.new_hvparams[hv_name] = hv_dict
2551 self.new_hvparams[hv_name].update(hv_dict)
2553 # os hypervisor parameters
2554 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2556 if not isinstance(self.op.os_hvp, dict):
2557 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2559 for os_name, hvs in self.op.os_hvp.items():
2560 if not isinstance(hvs, dict):
2561 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2562 " input"), errors.ECODE_INVAL)
2563 if os_name not in self.new_os_hvp:
2564 self.new_os_hvp[os_name] = hvs
2566 for hv_name, hv_dict in hvs.items():
2567 if hv_name not in self.new_os_hvp[os_name]:
2568 self.new_os_hvp[os_name][hv_name] = hv_dict
2570 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2573 self.new_osp = objects.FillDict(cluster.osparams, {})
2574 if self.op.osparams:
2575 if not isinstance(self.op.osparams, dict):
2576 raise errors.OpPrereqError("Invalid 'osparams' parameter on input",
2578 for os_name, osp in self.op.osparams.items():
2579 if not isinstance(osp, dict):
2580 raise errors.OpPrereqError(("Invalid 'osparams' parameter on"
2581 " input"), errors.ECODE_INVAL)
2582 if os_name not in self.new_osp:
2583 self.new_osp[os_name] = {}
2585 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2588 if not self.new_osp[os_name]:
2589 # we removed all parameters
2590 del self.new_osp[os_name]
2592 # check the parameter validity (remote check)
2593 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2594 os_name, self.new_osp[os_name])
2596 # changes to the hypervisor list
2597 if self.op.enabled_hypervisors is not None:
2598 self.hv_list = self.op.enabled_hypervisors
2599 if not self.hv_list:
2600 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2601 " least one member",
2603 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2605 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2607 utils.CommaJoin(invalid_hvs),
2609 for hv in self.hv_list:
2610 # if the hypervisor doesn't already exist in the cluster
2611 # hvparams, we initialize it to empty, and then (in both
2612 # cases) we make sure to fill the defaults, as we might not
2613 # have a complete defaults list if the hypervisor wasn't
2615 if hv not in new_hvp:
2617 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2618 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2620 self.hv_list = cluster.enabled_hypervisors
2622 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2623 # either the enabled list has changed, or the parameters have, validate
2624 for hv_name, hv_params in self.new_hvparams.items():
2625 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2626 (self.op.enabled_hypervisors and
2627 hv_name in self.op.enabled_hypervisors)):
2628 # either this is a new hypervisor, or its parameters have changed
2629 hv_class = hypervisor.GetHypervisor(hv_name)
2630 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2631 hv_class.CheckParameterSyntax(hv_params)
2632 _CheckHVParams(self, node_list, hv_name, hv_params)
2635 # no need to check any newly-enabled hypervisors, since the
2636 # defaults have already been checked in the above code-block
2637 for os_name, os_hvp in self.new_os_hvp.items():
2638 for hv_name, hv_params in os_hvp.items():
2639 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2640 # we need to fill in the new os_hvp on top of the actual hv_p
2641 cluster_defaults = self.new_hvparams.get(hv_name, {})
2642 new_osp = objects.FillDict(cluster_defaults, hv_params)
2643 hv_class = hypervisor.GetHypervisor(hv_name)
2644 hv_class.CheckParameterSyntax(new_osp)
2645 _CheckHVParams(self, node_list, hv_name, new_osp)
2648 def Exec(self, feedback_fn):
2649 """Change the parameters of the cluster.
2652 if self.op.vg_name is not None:
2653 new_volume = self.op.vg_name
2656 if new_volume != self.cfg.GetVGName():
2657 self.cfg.SetVGName(new_volume)
2659 feedback_fn("Cluster LVM configuration already in desired"
2660 " state, not changing")
2661 if self.op.hvparams:
2662 self.cluster.hvparams = self.new_hvparams
2664 self.cluster.os_hvp = self.new_os_hvp
2665 if self.op.enabled_hypervisors is not None:
2666 self.cluster.hvparams = self.new_hvparams
2667 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2668 if self.op.beparams:
2669 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2670 if self.op.nicparams:
2671 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2672 if self.op.osparams:
2673 self.cluster.osparams = self.new_osp
2675 if self.op.candidate_pool_size is not None:
2676 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2677 # we need to update the pool size here, otherwise the save will fail
2678 _AdjustCandidatePool(self, [])
2680 if self.op.maintain_node_health is not None:
2681 self.cluster.maintain_node_health = self.op.maintain_node_health
2683 if self.op.add_uids is not None:
2684 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2686 if self.op.remove_uids is not None:
2687 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2689 if self.op.uid_pool is not None:
2690 self.cluster.uid_pool = self.op.uid_pool
2692 self.cfg.Update(self.cluster, feedback_fn)
2695 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2696 """Distribute additional files which are part of the cluster configuration.
2698 ConfigWriter takes care of distributing the config and ssconf files, but
2699 there are more files which should be distributed to all nodes. This function
2700 makes sure those are copied.
2702 @param lu: calling logical unit
2703 @param additional_nodes: list of nodes not in the config to distribute to
2706 # 1. Gather target nodes
2707 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2708 dist_nodes = lu.cfg.GetOnlineNodeList()
2709 if additional_nodes is not None:
2710 dist_nodes.extend(additional_nodes)
2711 if myself.name in dist_nodes:
2712 dist_nodes.remove(myself.name)
2714 # 2. Gather files to distribute
2715 dist_files = set([constants.ETC_HOSTS,
2716 constants.SSH_KNOWN_HOSTS_FILE,
2717 constants.RAPI_CERT_FILE,
2718 constants.RAPI_USERS_FILE,
2719 constants.CONFD_HMAC_KEY,
2720 constants.CLUSTER_DOMAIN_SECRET_FILE,
2723 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2724 for hv_name in enabled_hypervisors:
2725 hv_class = hypervisor.GetHypervisor(hv_name)
2726 dist_files.update(hv_class.GetAncillaryFiles())
2728 # 3. Perform the files upload
2729 for fname in dist_files:
2730 if os.path.exists(fname):
2731 result = lu.rpc.call_upload_file(dist_nodes, fname)
2732 for to_node, to_result in result.items():
2733 msg = to_result.fail_msg
2735 msg = ("Copy of file %s to node %s failed: %s" %
2736 (fname, to_node, msg))
2737 lu.proc.LogWarning(msg)
2740 class LURedistributeConfig(NoHooksLU):
2741 """Force the redistribution of cluster configuration.
2743 This is a very simple LU.
2749 def ExpandNames(self):
2750 self.needed_locks = {
2751 locking.LEVEL_NODE: locking.ALL_SET,
2753 self.share_locks[locking.LEVEL_NODE] = 1
2755 def CheckPrereq(self):
2756 """Check prerequisites.
2760 def Exec(self, feedback_fn):
2761 """Redistribute the configuration.
2764 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2765 _RedistributeAncillaryFiles(self)
2768 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2769 """Sleep and poll for an instance's disk to sync.
2772 if not instance.disks or disks is not None and not disks:
2775 disks = _ExpandCheckDisks(instance, disks)
2778 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2780 node = instance.primary_node
2783 lu.cfg.SetDiskID(dev, node)
2785 # TODO: Convert to utils.Retry
2788 degr_retries = 10 # in seconds, as we sleep 1 second each time
2792 cumul_degraded = False
2793 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2794 msg = rstats.fail_msg
2796 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2799 raise errors.RemoteError("Can't contact node %s for mirror data,"
2800 " aborting." % node)
2803 rstats = rstats.payload
2805 for i, mstat in enumerate(rstats):
2807 lu.LogWarning("Can't compute data for node %s/%s",
2808 node, disks[i].iv_name)
2811 cumul_degraded = (cumul_degraded or
2812 (mstat.is_degraded and mstat.sync_percent is None))
2813 if mstat.sync_percent is not None:
2815 if mstat.estimated_time is not None:
2816 rem_time = ("%s remaining (estimated)" %
2817 utils.FormatSeconds(mstat.estimated_time))
2818 max_time = mstat.estimated_time
2820 rem_time = "no time estimate"
2821 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2822 (disks[i].iv_name, mstat.sync_percent, rem_time))
2824 # if we're done but degraded, let's do a few small retries, to
2825 # make sure we see a stable and not transient situation; therefore
2826 # we force restart of the loop
2827 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2828 logging.info("Degraded disks found, %d retries left", degr_retries)
2836 time.sleep(min(60, max_time))
2839 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2840 return not cumul_degraded
2843 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2844 """Check that mirrors are not degraded.
2846 The ldisk parameter, if True, will change the test from the
2847 is_degraded attribute (which represents overall non-ok status for
2848 the device(s)) to the ldisk (representing the local storage status).
2851 lu.cfg.SetDiskID(dev, node)
2855 if on_primary or dev.AssembleOnSecondary():
2856 rstats = lu.rpc.call_blockdev_find(node, dev)
2857 msg = rstats.fail_msg
2859 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2861 elif not rstats.payload:
2862 lu.LogWarning("Can't find disk on node %s", node)
2866 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2868 result = result and not rstats.payload.is_degraded
2871 for child in dev.children:
2872 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2877 class LUDiagnoseOS(NoHooksLU):
2878 """Logical unit for OS diagnose/query.
2881 _OP_REQP = ["output_fields", "names"]
2883 _FIELDS_STATIC = utils.FieldSet()
2884 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2885 "parameters", "api_versions")
2887 def CheckArguments(self):
2889 raise errors.OpPrereqError("Selective OS query not supported",
2892 _CheckOutputFields(static=self._FIELDS_STATIC,
2893 dynamic=self._FIELDS_DYNAMIC,
2894 selected=self.op.output_fields)
2896 def ExpandNames(self):
2897 # Lock all nodes, in shared mode
2898 # Temporary removal of locks, should be reverted later
2899 # TODO: reintroduce locks when they are lighter-weight
2900 self.needed_locks = {}
2901 #self.share_locks[locking.LEVEL_NODE] = 1
2902 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2904 def CheckPrereq(self):
2905 """Check prerequisites.
2910 def _DiagnoseByOS(rlist):
2911 """Remaps a per-node return list into an a per-os per-node dictionary
2913 @param rlist: a map with node names as keys and OS objects as values
2916 @return: a dictionary with osnames as keys and as value another
2917 map, with nodes as keys and tuples of (path, status, diagnose,
2918 variants, parameters, api_versions) as values, eg::
2920 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2921 (/srv/..., False, "invalid api")],
2922 "node2": [(/srv/..., True, "", [], [])]}
2927 # we build here the list of nodes that didn't fail the RPC (at RPC
2928 # level), so that nodes with a non-responding node daemon don't
2929 # make all OSes invalid
2930 good_nodes = [node_name for node_name in rlist
2931 if not rlist[node_name].fail_msg]
2932 for node_name, nr in rlist.items():
2933 if nr.fail_msg or not nr.payload:
2935 for (name, path, status, diagnose, variants,
2936 params, api_versions) in nr.payload:
2937 if name not in all_os:
2938 # build a list of nodes for this os containing empty lists
2939 # for each node in node_list
2941 for nname in good_nodes:
2942 all_os[name][nname] = []
2943 # convert params from [name, help] to (name, help)
2944 params = [tuple(v) for v in params]
2945 all_os[name][node_name].append((path, status, diagnose,
2946 variants, params, api_versions))
2949 def Exec(self, feedback_fn):
2950 """Compute the list of OSes.
2953 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2954 node_data = self.rpc.call_os_diagnose(valid_nodes)
2955 pol = self._DiagnoseByOS(node_data)
2958 for os_name, os_data in pol.items():
2961 (variants, params, api_versions) = null_state = (set(), set(), set())
2962 for idx, osl in enumerate(os_data.values()):
2963 valid = bool(valid and osl and osl[0][1])
2965 (variants, params, api_versions) = null_state
2967 node_variants, node_params, node_api = osl[0][3:6]
2968 if idx == 0: # first entry
2969 variants = set(node_variants)
2970 params = set(node_params)
2971 api_versions = set(node_api)
2972 else: # keep consistency
2973 variants.intersection_update(node_variants)
2974 params.intersection_update(node_params)
2975 api_versions.intersection_update(node_api)
2977 for field in self.op.output_fields:
2980 elif field == "valid":
2982 elif field == "node_status":
2983 # this is just a copy of the dict
2985 for node_name, nos_list in os_data.items():
2986 val[node_name] = nos_list
2987 elif field == "variants":
2988 val = list(variants)
2989 elif field == "parameters":
2991 elif field == "api_versions":
2992 val = list(api_versions)
2994 raise errors.ParameterError(field)
3001 class LURemoveNode(LogicalUnit):
3002 """Logical unit for removing a node.
3005 HPATH = "node-remove"
3006 HTYPE = constants.HTYPE_NODE
3007 _OP_REQP = ["node_name"]
3009 def BuildHooksEnv(self):
3012 This doesn't run on the target node in the pre phase as a failed
3013 node would then be impossible to remove.
3017 "OP_TARGET": self.op.node_name,
3018 "NODE_NAME": self.op.node_name,
3020 all_nodes = self.cfg.GetNodeList()
3022 all_nodes.remove(self.op.node_name)
3024 logging.warning("Node %s which is about to be removed not found"
3025 " in the all nodes list", self.op.node_name)
3026 return env, all_nodes, all_nodes
3028 def CheckPrereq(self):
3029 """Check prerequisites.
3032 - the node exists in the configuration
3033 - it does not have primary or secondary instances
3034 - it's not the master
3036 Any errors are signaled by raising errors.OpPrereqError.
3039 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3040 node = self.cfg.GetNodeInfo(self.op.node_name)
3041 assert node is not None
3043 instance_list = self.cfg.GetInstanceList()
3045 masternode = self.cfg.GetMasterNode()
3046 if node.name == masternode:
3047 raise errors.OpPrereqError("Node is the master node,"
3048 " you need to failover first.",
3051 for instance_name in instance_list:
3052 instance = self.cfg.GetInstanceInfo(instance_name)
3053 if node.name in instance.all_nodes:
3054 raise errors.OpPrereqError("Instance %s is still running on the node,"
3055 " please remove first." % instance_name,
3057 self.op.node_name = node.name
3060 def Exec(self, feedback_fn):
3061 """Removes the node from the cluster.
3065 logging.info("Stopping the node daemon and removing configs from node %s",
3068 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3070 # Promote nodes to master candidate as needed
3071 _AdjustCandidatePool(self, exceptions=[node.name])
3072 self.context.RemoveNode(node.name)
3074 # Run post hooks on the node before it's removed
3075 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3077 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3079 # pylint: disable-msg=W0702
3080 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3082 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3083 msg = result.fail_msg
3085 self.LogWarning("Errors encountered on the remote node while leaving"
3086 " the cluster: %s", msg)
3088 # Remove node from our /etc/hosts
3089 if self.cfg.GetClusterInfo().modify_etc_hosts:
3090 # FIXME: this should be done via an rpc call to node daemon
3091 utils.RemoveHostFromEtcHosts(node.name)
3092 _RedistributeAncillaryFiles(self)
3095 class LUQueryNodes(NoHooksLU):
3096 """Logical unit for querying nodes.
3099 # pylint: disable-msg=W0142
3100 _OP_REQP = ["output_fields", "names", "use_locking"]
3103 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3104 "master_candidate", "offline", "drained"]
3106 _FIELDS_DYNAMIC = utils.FieldSet(
3108 "mtotal", "mnode", "mfree",
3110 "ctotal", "cnodes", "csockets",
3113 _FIELDS_STATIC = utils.FieldSet(*[
3114 "pinst_cnt", "sinst_cnt",
3115 "pinst_list", "sinst_list",
3116 "pip", "sip", "tags",
3118 "role"] + _SIMPLE_FIELDS
3121 def CheckArguments(self):
3122 _CheckOutputFields(static=self._FIELDS_STATIC,
3123 dynamic=self._FIELDS_DYNAMIC,
3124 selected=self.op.output_fields)
3126 def ExpandNames(self):
3127 self.needed_locks = {}
3128 self.share_locks[locking.LEVEL_NODE] = 1
3131 self.wanted = _GetWantedNodes(self, self.op.names)
3133 self.wanted = locking.ALL_SET
3135 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3136 self.do_locking = self.do_node_query and self.op.use_locking
3138 # if we don't request only static fields, we need to lock the nodes
3139 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3141 def CheckPrereq(self):
3142 """Check prerequisites.
3145 # The validation of the node list is done in the _GetWantedNodes,
3146 # if non empty, and if empty, there's no validation to do
3149 def Exec(self, feedback_fn):
3150 """Computes the list of nodes and their attributes.
3153 all_info = self.cfg.GetAllNodesInfo()
3155 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3156 elif self.wanted != locking.ALL_SET:
3157 nodenames = self.wanted
3158 missing = set(nodenames).difference(all_info.keys())
3160 raise errors.OpExecError(
3161 "Some nodes were removed before retrieving their data: %s" % missing)
3163 nodenames = all_info.keys()
3165 nodenames = utils.NiceSort(nodenames)
3166 nodelist = [all_info[name] for name in nodenames]
3168 # begin data gathering
3170 if self.do_node_query:
3172 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3173 self.cfg.GetHypervisorType())
3174 for name in nodenames:
3175 nodeinfo = node_data[name]
3176 if not nodeinfo.fail_msg and nodeinfo.payload:
3177 nodeinfo = nodeinfo.payload
3178 fn = utils.TryConvert
3180 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3181 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3182 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3183 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3184 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3185 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3186 "bootid": nodeinfo.get('bootid', None),
3187 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3188 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3191 live_data[name] = {}
3193 live_data = dict.fromkeys(nodenames, {})
3195 node_to_primary = dict([(name, set()) for name in nodenames])
3196 node_to_secondary = dict([(name, set()) for name in nodenames])
3198 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3199 "sinst_cnt", "sinst_list"))
3200 if inst_fields & frozenset(self.op.output_fields):
3201 inst_data = self.cfg.GetAllInstancesInfo()
3203 for inst in inst_data.values():
3204 if inst.primary_node in node_to_primary:
3205 node_to_primary[inst.primary_node].add(inst.name)
3206 for secnode in inst.secondary_nodes:
3207 if secnode in node_to_secondary:
3208 node_to_secondary[secnode].add(inst.name)
3210 master_node = self.cfg.GetMasterNode()
3212 # end data gathering
3215 for node in nodelist:
3217 for field in self.op.output_fields:
3218 if field in self._SIMPLE_FIELDS:
3219 val = getattr(node, field)
3220 elif field == "pinst_list":
3221 val = list(node_to_primary[node.name])
3222 elif field == "sinst_list":
3223 val = list(node_to_secondary[node.name])
3224 elif field == "pinst_cnt":
3225 val = len(node_to_primary[node.name])
3226 elif field == "sinst_cnt":
3227 val = len(node_to_secondary[node.name])
3228 elif field == "pip":
3229 val = node.primary_ip
3230 elif field == "sip":
3231 val = node.secondary_ip
3232 elif field == "tags":
3233 val = list(node.GetTags())
3234 elif field == "master":
3235 val = node.name == master_node
3236 elif self._FIELDS_DYNAMIC.Matches(field):
3237 val = live_data[node.name].get(field, None)
3238 elif field == "role":
3239 if node.name == master_node:
3241 elif node.master_candidate:
3250 raise errors.ParameterError(field)
3251 node_output.append(val)
3252 output.append(node_output)
3257 class LUQueryNodeVolumes(NoHooksLU):
3258 """Logical unit for getting volumes on node(s).
3261 _OP_REQP = ["nodes", "output_fields"]
3263 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3264 _FIELDS_STATIC = utils.FieldSet("node")
3266 def CheckArguments(self):
3267 _CheckOutputFields(static=self._FIELDS_STATIC,
3268 dynamic=self._FIELDS_DYNAMIC,
3269 selected=self.op.output_fields)
3271 def ExpandNames(self):
3272 self.needed_locks = {}
3273 self.share_locks[locking.LEVEL_NODE] = 1
3274 if not self.op.nodes:
3275 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3277 self.needed_locks[locking.LEVEL_NODE] = \
3278 _GetWantedNodes(self, self.op.nodes)
3280 def CheckPrereq(self):
3281 """Check prerequisites.
3283 This checks that the fields required are valid output fields.
3286 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3288 def Exec(self, feedback_fn):
3289 """Computes the list of nodes and their attributes.
3292 nodenames = self.nodes
3293 volumes = self.rpc.call_node_volumes(nodenames)
3295 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3296 in self.cfg.GetInstanceList()]
3298 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3301 for node in nodenames:
3302 nresult = volumes[node]
3305 msg = nresult.fail_msg
3307 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3310 node_vols = nresult.payload[:]
3311 node_vols.sort(key=lambda vol: vol['dev'])
3313 for vol in node_vols:
3315 for field in self.op.output_fields:
3318 elif field == "phys":
3322 elif field == "name":
3324 elif field == "size":
3325 val = int(float(vol['size']))
3326 elif field == "instance":
3328 if node not in lv_by_node[inst]:
3330 if vol['name'] in lv_by_node[inst][node]:
3336 raise errors.ParameterError(field)
3337 node_output.append(str(val))
3339 output.append(node_output)
3344 class LUQueryNodeStorage(NoHooksLU):
3345 """Logical unit for getting information on storage units on node(s).
3348 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3349 _OP_DEFS = [("name", None)]
3351 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3353 def CheckArguments(self):
3354 _CheckStorageType(self.op.storage_type)
3356 _CheckOutputFields(static=self._FIELDS_STATIC,
3357 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3358 selected=self.op.output_fields)
3360 def ExpandNames(self):
3361 self.needed_locks = {}
3362 self.share_locks[locking.LEVEL_NODE] = 1
3365 self.needed_locks[locking.LEVEL_NODE] = \
3366 _GetWantedNodes(self, self.op.nodes)
3368 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3370 def CheckPrereq(self):
3371 """Check prerequisites.
3373 This checks that the fields required are valid output fields.
3376 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3378 def Exec(self, feedback_fn):
3379 """Computes the list of nodes and their attributes.
3382 # Always get name to sort by
3383 if constants.SF_NAME in self.op.output_fields:
3384 fields = self.op.output_fields[:]
3386 fields = [constants.SF_NAME] + self.op.output_fields
3388 # Never ask for node or type as it's only known to the LU
3389 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3390 while extra in fields:
3391 fields.remove(extra)
3393 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3394 name_idx = field_idx[constants.SF_NAME]
3396 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3397 data = self.rpc.call_storage_list(self.nodes,
3398 self.op.storage_type, st_args,
3399 self.op.name, fields)
3403 for node in utils.NiceSort(self.nodes):
3404 nresult = data[node]
3408 msg = nresult.fail_msg
3410 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3413 rows = dict([(row[name_idx], row) for row in nresult.payload])
3415 for name in utils.NiceSort(rows.keys()):
3420 for field in self.op.output_fields:
3421 if field == constants.SF_NODE:
3423 elif field == constants.SF_TYPE:
3424 val = self.op.storage_type
3425 elif field in field_idx:
3426 val = row[field_idx[field]]
3428 raise errors.ParameterError(field)
3437 class LUModifyNodeStorage(NoHooksLU):
3438 """Logical unit for modifying a storage volume on a node.
3441 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3444 def CheckArguments(self):
3445 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3447 _CheckStorageType(self.op.storage_type)
3449 def ExpandNames(self):
3450 self.needed_locks = {
3451 locking.LEVEL_NODE: self.op.node_name,
3454 def CheckPrereq(self):
3455 """Check prerequisites.
3458 storage_type = self.op.storage_type
3461 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3463 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3464 " modified" % storage_type,
3467 diff = set(self.op.changes.keys()) - modifiable
3469 raise errors.OpPrereqError("The following fields can not be modified for"
3470 " storage units of type '%s': %r" %
3471 (storage_type, list(diff)),
3474 def Exec(self, feedback_fn):
3475 """Computes the list of nodes and their attributes.
3478 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3479 result = self.rpc.call_storage_modify(self.op.node_name,
3480 self.op.storage_type, st_args,
3481 self.op.name, self.op.changes)
3482 result.Raise("Failed to modify storage unit '%s' on %s" %
3483 (self.op.name, self.op.node_name))
3486 class LUAddNode(LogicalUnit):
3487 """Logical unit for adding node to the cluster.
3491 HTYPE = constants.HTYPE_NODE
3492 _OP_REQP = ["node_name"]
3493 _OP_DEFS = [("secondary_ip", None)]
3495 def CheckArguments(self):
3496 # validate/normalize the node name
3497 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3499 def BuildHooksEnv(self):
3502 This will run on all nodes before, and on all nodes + the new node after.
3506 "OP_TARGET": self.op.node_name,
3507 "NODE_NAME": self.op.node_name,
3508 "NODE_PIP": self.op.primary_ip,
3509 "NODE_SIP": self.op.secondary_ip,
3511 nodes_0 = self.cfg.GetNodeList()
3512 nodes_1 = nodes_0 + [self.op.node_name, ]
3513 return env, nodes_0, nodes_1
3515 def CheckPrereq(self):
3516 """Check prerequisites.
3519 - the new node is not already in the config
3521 - its parameters (single/dual homed) matches the cluster
3523 Any errors are signaled by raising errors.OpPrereqError.
3526 node_name = self.op.node_name
3529 dns_data = utils.GetHostInfo(node_name)
3531 node = dns_data.name
3532 primary_ip = self.op.primary_ip = dns_data.ip
3533 if self.op.secondary_ip is None:
3534 self.op.secondary_ip = primary_ip
3535 if not utils.IsValidIP(self.op.secondary_ip):
3536 raise errors.OpPrereqError("Invalid secondary IP given",
3538 secondary_ip = self.op.secondary_ip
3540 node_list = cfg.GetNodeList()
3541 if not self.op.readd and node in node_list:
3542 raise errors.OpPrereqError("Node %s is already in the configuration" %
3543 node, errors.ECODE_EXISTS)
3544 elif self.op.readd and node not in node_list:
3545 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3548 self.changed_primary_ip = False
3550 for existing_node_name in node_list:
3551 existing_node = cfg.GetNodeInfo(existing_node_name)
3553 if self.op.readd and node == existing_node_name:
3554 if existing_node.secondary_ip != secondary_ip:
3555 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3556 " address configuration as before",
3558 if existing_node.primary_ip != primary_ip:
3559 self.changed_primary_ip = True
3563 if (existing_node.primary_ip == primary_ip or
3564 existing_node.secondary_ip == primary_ip or
3565 existing_node.primary_ip == secondary_ip or
3566 existing_node.secondary_ip == secondary_ip):
3567 raise errors.OpPrereqError("New node ip address(es) conflict with"
3568 " existing node %s" % existing_node.name,
3569 errors.ECODE_NOTUNIQUE)
3571 # check that the type of the node (single versus dual homed) is the
3572 # same as for the master
3573 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3574 master_singlehomed = myself.secondary_ip == myself.primary_ip
3575 newbie_singlehomed = secondary_ip == primary_ip
3576 if master_singlehomed != newbie_singlehomed:
3577 if master_singlehomed:
3578 raise errors.OpPrereqError("The master has no private ip but the"
3579 " new node has one",
3582 raise errors.OpPrereqError("The master has a private ip but the"
3583 " new node doesn't have one",
3586 # checks reachability
3587 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3588 raise errors.OpPrereqError("Node not reachable by ping",
3589 errors.ECODE_ENVIRON)
3591 if not newbie_singlehomed:
3592 # check reachability from my secondary ip to newbie's secondary ip
3593 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3594 source=myself.secondary_ip):
3595 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3596 " based ping to noded port",
3597 errors.ECODE_ENVIRON)
3604 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3607 self.new_node = self.cfg.GetNodeInfo(node)
3608 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3610 self.new_node = objects.Node(name=node,
3611 primary_ip=primary_ip,
3612 secondary_ip=secondary_ip,
3613 master_candidate=self.master_candidate,
3614 offline=False, drained=False)
3616 def Exec(self, feedback_fn):
3617 """Adds the new node to the cluster.
3620 new_node = self.new_node
3621 node = new_node.name
3623 # for re-adds, reset the offline/drained/master-candidate flags;
3624 # we need to reset here, otherwise offline would prevent RPC calls
3625 # later in the procedure; this also means that if the re-add
3626 # fails, we are left with a non-offlined, broken node
3628 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3629 self.LogInfo("Readding a node, the offline/drained flags were reset")
3630 # if we demote the node, we do cleanup later in the procedure
3631 new_node.master_candidate = self.master_candidate
3632 if self.changed_primary_ip:
3633 new_node.primary_ip = self.op.primary_ip
3635 # notify the user about any possible mc promotion
3636 if new_node.master_candidate:
3637 self.LogInfo("Node will be a master candidate")
3639 # check connectivity
3640 result = self.rpc.call_version([node])[node]
3641 result.Raise("Can't get version information from node %s" % node)
3642 if constants.PROTOCOL_VERSION == result.payload:
3643 logging.info("Communication to node %s fine, sw version %s match",
3644 node, result.payload)
3646 raise errors.OpExecError("Version mismatch master version %s,"
3647 " node version %s" %
3648 (constants.PROTOCOL_VERSION, result.payload))
3651 if self.cfg.GetClusterInfo().modify_ssh_setup:
3652 logging.info("Copy ssh key to node %s", node)
3653 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3655 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3656 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3660 keyarray.append(utils.ReadFile(i))
3662 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3663 keyarray[2], keyarray[3], keyarray[4],
3665 result.Raise("Cannot transfer ssh keys to the new node")
3667 # Add node to our /etc/hosts, and add key to known_hosts
3668 if self.cfg.GetClusterInfo().modify_etc_hosts:
3669 # FIXME: this should be done via an rpc call to node daemon
3670 utils.AddHostToEtcHosts(new_node.name)
3672 if new_node.secondary_ip != new_node.primary_ip:
3673 result = self.rpc.call_node_has_ip_address(new_node.name,
3674 new_node.secondary_ip)
3675 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3676 prereq=True, ecode=errors.ECODE_ENVIRON)
3677 if not result.payload:
3678 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3679 " you gave (%s). Please fix and re-run this"
3680 " command." % new_node.secondary_ip)
3682 node_verify_list = [self.cfg.GetMasterNode()]
3683 node_verify_param = {
3684 constants.NV_NODELIST: [node],
3685 # TODO: do a node-net-test as well?
3688 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3689 self.cfg.GetClusterName())
3690 for verifier in node_verify_list:
3691 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3692 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3694 for failed in nl_payload:
3695 feedback_fn("ssh/hostname verification failed"
3696 " (checking from %s): %s" %
3697 (verifier, nl_payload[failed]))
3698 raise errors.OpExecError("ssh/hostname verification failed.")
3701 _RedistributeAncillaryFiles(self)
3702 self.context.ReaddNode(new_node)
3703 # make sure we redistribute the config
3704 self.cfg.Update(new_node, feedback_fn)
3705 # and make sure the new node will not have old files around
3706 if not new_node.master_candidate:
3707 result = self.rpc.call_node_demote_from_mc(new_node.name)
3708 msg = result.fail_msg
3710 self.LogWarning("Node failed to demote itself from master"
3711 " candidate status: %s" % msg)
3713 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3714 self.context.AddNode(new_node, self.proc.GetECId())
3717 class LUSetNodeParams(LogicalUnit):
3718 """Modifies the parameters of a node.
3721 HPATH = "node-modify"
3722 HTYPE = constants.HTYPE_NODE
3723 _OP_REQP = ["node_name"]
3726 def CheckArguments(self):
3727 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3728 _CheckBooleanOpField(self.op, 'master_candidate')
3729 _CheckBooleanOpField(self.op, 'offline')
3730 _CheckBooleanOpField(self.op, 'drained')
3731 _CheckBooleanOpField(self.op, 'auto_promote')
3732 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3733 if all_mods.count(None) == 3:
3734 raise errors.OpPrereqError("Please pass at least one modification",
3736 if all_mods.count(True) > 1:
3737 raise errors.OpPrereqError("Can't set the node into more than one"
3738 " state at the same time",
3741 # Boolean value that tells us whether we're offlining or draining the node
3742 self.offline_or_drain = (self.op.offline == True or
3743 self.op.drained == True)
3744 self.deoffline_or_drain = (self.op.offline == False or
3745 self.op.drained == False)
3746 self.might_demote = (self.op.master_candidate == False or
3747 self.offline_or_drain)
3749 self.lock_all = self.op.auto_promote and self.might_demote
3752 def ExpandNames(self):
3754 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3756 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3758 def BuildHooksEnv(self):
3761 This runs on the master node.
3765 "OP_TARGET": self.op.node_name,
3766 "MASTER_CANDIDATE": str(self.op.master_candidate),
3767 "OFFLINE": str(self.op.offline),
3768 "DRAINED": str(self.op.drained),
3770 nl = [self.cfg.GetMasterNode(),
3774 def CheckPrereq(self):
3775 """Check prerequisites.
3777 This only checks the instance list against the existing names.
3780 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3782 if (self.op.master_candidate is not None or
3783 self.op.drained is not None or
3784 self.op.offline is not None):
3785 # we can't change the master's node flags
3786 if self.op.node_name == self.cfg.GetMasterNode():
3787 raise errors.OpPrereqError("The master role can be changed"
3788 " only via masterfailover",
3792 if node.master_candidate and self.might_demote and not self.lock_all:
3793 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3794 # check if after removing the current node, we're missing master
3796 (mc_remaining, mc_should, _) = \
3797 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3798 if mc_remaining < mc_should:
3799 raise errors.OpPrereqError("Not enough master candidates, please"
3800 " pass auto_promote to allow promotion",
3803 if (self.op.master_candidate == True and
3804 ((node.offline and not self.op.offline == False) or
3805 (node.drained and not self.op.drained == False))):
3806 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3807 " to master_candidate" % node.name,
3810 # If we're being deofflined/drained, we'll MC ourself if needed
3811 if (self.deoffline_or_drain and not self.offline_or_drain and not
3812 self.op.master_candidate == True and not node.master_candidate):
3813 self.op.master_candidate = _DecideSelfPromotion(self)
3814 if self.op.master_candidate:
3815 self.LogInfo("Autopromoting node to master candidate")
3819 def Exec(self, feedback_fn):
3828 if self.op.offline is not None:
3829 node.offline = self.op.offline
3830 result.append(("offline", str(self.op.offline)))
3831 if self.op.offline == True:
3832 if node.master_candidate:
3833 node.master_candidate = False
3835 result.append(("master_candidate", "auto-demotion due to offline"))
3837 node.drained = False
3838 result.append(("drained", "clear drained status due to offline"))
3840 if self.op.master_candidate is not None:
3841 node.master_candidate = self.op.master_candidate
3843 result.append(("master_candidate", str(self.op.master_candidate)))
3844 if self.op.master_candidate == False:
3845 rrc = self.rpc.call_node_demote_from_mc(node.name)
3848 self.LogWarning("Node failed to demote itself: %s" % msg)
3850 if self.op.drained is not None:
3851 node.drained = self.op.drained
3852 result.append(("drained", str(self.op.drained)))
3853 if self.op.drained == True:
3854 if node.master_candidate:
3855 node.master_candidate = False
3857 result.append(("master_candidate", "auto-demotion due to drain"))
3858 rrc = self.rpc.call_node_demote_from_mc(node.name)
3861 self.LogWarning("Node failed to demote itself: %s" % msg)
3863 node.offline = False
3864 result.append(("offline", "clear offline status due to drain"))
3866 # we locked all nodes, we adjust the CP before updating this node
3868 _AdjustCandidatePool(self, [node.name])
3870 # this will trigger configuration file update, if needed
3871 self.cfg.Update(node, feedback_fn)
3873 # this will trigger job queue propagation or cleanup
3875 self.context.ReaddNode(node)
3880 class LUPowercycleNode(NoHooksLU):
3881 """Powercycles a node.
3884 _OP_REQP = ["node_name", "force"]
3887 def CheckArguments(self):
3888 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3889 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3890 raise errors.OpPrereqError("The node is the master and the force"
3891 " parameter was not set",
3894 def ExpandNames(self):
3895 """Locking for PowercycleNode.
3897 This is a last-resort option and shouldn't block on other
3898 jobs. Therefore, we grab no locks.
3901 self.needed_locks = {}
3903 def CheckPrereq(self):
3904 """Check prerequisites.
3906 This LU has no prereqs.
3911 def Exec(self, feedback_fn):
3915 result = self.rpc.call_node_powercycle(self.op.node_name,
3916 self.cfg.GetHypervisorType())
3917 result.Raise("Failed to schedule the reboot")
3918 return result.payload
3921 class LUQueryClusterInfo(NoHooksLU):
3922 """Query cluster configuration.
3928 def ExpandNames(self):
3929 self.needed_locks = {}
3931 def CheckPrereq(self):
3932 """No prerequsites needed for this LU.
3937 def Exec(self, feedback_fn):
3938 """Return cluster config.
3941 cluster = self.cfg.GetClusterInfo()
3944 # Filter just for enabled hypervisors
3945 for os_name, hv_dict in cluster.os_hvp.items():
3946 os_hvp[os_name] = {}
3947 for hv_name, hv_params in hv_dict.items():
3948 if hv_name in cluster.enabled_hypervisors:
3949 os_hvp[os_name][hv_name] = hv_params
3952 "software_version": constants.RELEASE_VERSION,
3953 "protocol_version": constants.PROTOCOL_VERSION,
3954 "config_version": constants.CONFIG_VERSION,
3955 "os_api_version": max(constants.OS_API_VERSIONS),
3956 "export_version": constants.EXPORT_VERSION,
3957 "architecture": (platform.architecture()[0], platform.machine()),
3958 "name": cluster.cluster_name,
3959 "master": cluster.master_node,
3960 "default_hypervisor": cluster.enabled_hypervisors[0],
3961 "enabled_hypervisors": cluster.enabled_hypervisors,
3962 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3963 for hypervisor_name in cluster.enabled_hypervisors]),
3965 "beparams": cluster.beparams,
3966 "osparams": cluster.osparams,
3967 "nicparams": cluster.nicparams,
3968 "candidate_pool_size": cluster.candidate_pool_size,
3969 "master_netdev": cluster.master_netdev,
3970 "volume_group_name": cluster.volume_group_name,
3971 "file_storage_dir": cluster.file_storage_dir,
3972 "maintain_node_health": cluster.maintain_node_health,
3973 "ctime": cluster.ctime,
3974 "mtime": cluster.mtime,
3975 "uuid": cluster.uuid,
3976 "tags": list(cluster.GetTags()),
3977 "uid_pool": cluster.uid_pool,
3983 class LUQueryConfigValues(NoHooksLU):
3984 """Return configuration values.
3989 _FIELDS_DYNAMIC = utils.FieldSet()
3990 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3993 def CheckArguments(self):
3994 _CheckOutputFields(static=self._FIELDS_STATIC,
3995 dynamic=self._FIELDS_DYNAMIC,
3996 selected=self.op.output_fields)
3998 def ExpandNames(self):
3999 self.needed_locks = {}
4001 def CheckPrereq(self):
4002 """No prerequisites.
4007 def Exec(self, feedback_fn):
4008 """Dump a representation of the cluster config to the standard output.
4012 for field in self.op.output_fields:
4013 if field == "cluster_name":
4014 entry = self.cfg.GetClusterName()
4015 elif field == "master_node":
4016 entry = self.cfg.GetMasterNode()
4017 elif field == "drain_flag":
4018 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4019 elif field == "watcher_pause":
4020 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4022 raise errors.ParameterError(field)
4023 values.append(entry)
4027 class LUActivateInstanceDisks(NoHooksLU):
4028 """Bring up an instance's disks.
4031 _OP_REQP = ["instance_name"]
4032 _OP_DEFS = [("ignore_size", False)]
4035 def ExpandNames(self):
4036 self._ExpandAndLockInstance()
4037 self.needed_locks[locking.LEVEL_NODE] = []
4038 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4040 def DeclareLocks(self, level):
4041 if level == locking.LEVEL_NODE:
4042 self._LockInstancesNodes()
4044 def CheckPrereq(self):
4045 """Check prerequisites.
4047 This checks that the instance is in the cluster.
4050 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4051 assert self.instance is not None, \
4052 "Cannot retrieve locked instance %s" % self.op.instance_name
4053 _CheckNodeOnline(self, self.instance.primary_node)
4055 def Exec(self, feedback_fn):
4056 """Activate the disks.
4059 disks_ok, disks_info = \
4060 _AssembleInstanceDisks(self, self.instance,
4061 ignore_size=self.op.ignore_size)
4063 raise errors.OpExecError("Cannot activate block devices")
4068 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4070 """Prepare the block devices for an instance.
4072 This sets up the block devices on all nodes.
4074 @type lu: L{LogicalUnit}
4075 @param lu: the logical unit on whose behalf we execute
4076 @type instance: L{objects.Instance}
4077 @param instance: the instance for whose disks we assemble
4078 @type disks: list of L{objects.Disk} or None
4079 @param disks: which disks to assemble (or all, if None)
4080 @type ignore_secondaries: boolean
4081 @param ignore_secondaries: if true, errors on secondary nodes
4082 won't result in an error return from the function
4083 @type ignore_size: boolean
4084 @param ignore_size: if true, the current known size of the disk
4085 will not be used during the disk activation, useful for cases
4086 when the size is wrong
4087 @return: False if the operation failed, otherwise a list of
4088 (host, instance_visible_name, node_visible_name)
4089 with the mapping from node devices to instance devices
4094 iname = instance.name
4095 disks = _ExpandCheckDisks(instance, disks)
4097 # With the two passes mechanism we try to reduce the window of
4098 # opportunity for the race condition of switching DRBD to primary
4099 # before handshaking occured, but we do not eliminate it
4101 # The proper fix would be to wait (with some limits) until the
4102 # connection has been made and drbd transitions from WFConnection
4103 # into any other network-connected state (Connected, SyncTarget,
4106 # 1st pass, assemble on all nodes in secondary mode
4107 for inst_disk in disks:
4108 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4110 node_disk = node_disk.Copy()
4111 node_disk.UnsetSize()
4112 lu.cfg.SetDiskID(node_disk, node)
4113 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4114 msg = result.fail_msg
4116 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4117 " (is_primary=False, pass=1): %s",
4118 inst_disk.iv_name, node, msg)
4119 if not ignore_secondaries:
4122 # FIXME: race condition on drbd migration to primary
4124 # 2nd pass, do only the primary node
4125 for inst_disk in disks:
4128 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4129 if node != instance.primary_node:
4132 node_disk = node_disk.Copy()
4133 node_disk.UnsetSize()
4134 lu.cfg.SetDiskID(node_disk, node)
4135 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4136 msg = result.fail_msg
4138 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4139 " (is_primary=True, pass=2): %s",
4140 inst_disk.iv_name, node, msg)
4143 dev_path = result.payload
4145 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4147 # leave the disks configured for the primary node
4148 # this is a workaround that would be fixed better by
4149 # improving the logical/physical id handling
4151 lu.cfg.SetDiskID(disk, instance.primary_node)
4153 return disks_ok, device_info
4156 def _StartInstanceDisks(lu, instance, force):
4157 """Start the disks of an instance.
4160 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4161 ignore_secondaries=force)
4163 _ShutdownInstanceDisks(lu, instance)
4164 if force is not None and not force:
4165 lu.proc.LogWarning("", hint="If the message above refers to a"
4167 " you can retry the operation using '--force'.")
4168 raise errors.OpExecError("Disk consistency error")
4171 class LUDeactivateInstanceDisks(NoHooksLU):
4172 """Shutdown an instance's disks.
4175 _OP_REQP = ["instance_name"]
4178 def ExpandNames(self):
4179 self._ExpandAndLockInstance()
4180 self.needed_locks[locking.LEVEL_NODE] = []
4181 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4183 def DeclareLocks(self, level):
4184 if level == locking.LEVEL_NODE:
4185 self._LockInstancesNodes()
4187 def CheckPrereq(self):
4188 """Check prerequisites.
4190 This checks that the instance is in the cluster.
4193 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4194 assert self.instance is not None, \
4195 "Cannot retrieve locked instance %s" % self.op.instance_name
4197 def Exec(self, feedback_fn):
4198 """Deactivate the disks
4201 instance = self.instance
4202 _SafeShutdownInstanceDisks(self, instance)
4205 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4206 """Shutdown block devices of an instance.
4208 This function checks if an instance is running, before calling
4209 _ShutdownInstanceDisks.
4212 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4213 _ShutdownInstanceDisks(lu, instance, disks=disks)
4216 def _ExpandCheckDisks(instance, disks):
4217 """Return the instance disks selected by the disks list
4219 @type disks: list of L{objects.Disk} or None
4220 @param disks: selected disks
4221 @rtype: list of L{objects.Disk}
4222 @return: selected instance disks to act on
4226 return instance.disks
4228 if not set(disks).issubset(instance.disks):
4229 raise errors.ProgrammerError("Can only act on disks belonging to the"
4234 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4235 """Shutdown block devices of an instance.
4237 This does the shutdown on all nodes of the instance.
4239 If the ignore_primary is false, errors on the primary node are
4244 disks = _ExpandCheckDisks(instance, disks)
4247 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4248 lu.cfg.SetDiskID(top_disk, node)
4249 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4250 msg = result.fail_msg
4252 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4253 disk.iv_name, node, msg)
4254 if not ignore_primary or node != instance.primary_node:
4259 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4260 """Checks if a node has enough free memory.
4262 This function check if a given node has the needed amount of free
4263 memory. In case the node has less memory or we cannot get the
4264 information from the node, this function raise an OpPrereqError
4267 @type lu: C{LogicalUnit}
4268 @param lu: a logical unit from which we get configuration data
4270 @param node: the node to check
4271 @type reason: C{str}
4272 @param reason: string to use in the error message
4273 @type requested: C{int}
4274 @param requested: the amount of memory in MiB to check for
4275 @type hypervisor_name: C{str}
4276 @param hypervisor_name: the hypervisor to ask for memory stats
4277 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4278 we cannot check the node
4281 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4282 nodeinfo[node].Raise("Can't get data from node %s" % node,
4283 prereq=True, ecode=errors.ECODE_ENVIRON)
4284 free_mem = nodeinfo[node].payload.get('memory_free', None)
4285 if not isinstance(free_mem, int):
4286 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4287 " was '%s'" % (node, free_mem),
4288 errors.ECODE_ENVIRON)
4289 if requested > free_mem:
4290 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4291 " needed %s MiB, available %s MiB" %
4292 (node, reason, requested, free_mem),
4296 def _CheckNodesFreeDisk(lu, nodenames, requested):
4297 """Checks if nodes have enough free disk space in the default VG.
4299 This function check if all given nodes have the needed amount of
4300 free disk. In case any node has less disk or we cannot get the
4301 information from the node, this function raise an OpPrereqError
4304 @type lu: C{LogicalUnit}
4305 @param lu: a logical unit from which we get configuration data
4306 @type nodenames: C{list}
4307 @param nodenames: the list of node names to check
4308 @type requested: C{int}
4309 @param requested: the amount of disk in MiB to check for
4310 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4311 we cannot check the node
4314 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4315 lu.cfg.GetHypervisorType())
4316 for node in nodenames:
4317 info = nodeinfo[node]
4318 info.Raise("Cannot get current information from node %s" % node,
4319 prereq=True, ecode=errors.ECODE_ENVIRON)
4320 vg_free = info.payload.get("vg_free", None)
4321 if not isinstance(vg_free, int):
4322 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4323 " result was '%s'" % (node, vg_free),
4324 errors.ECODE_ENVIRON)
4325 if requested > vg_free:
4326 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4327 " required %d MiB, available %d MiB" %
4328 (node, requested, vg_free),
4332 class LUStartupInstance(LogicalUnit):
4333 """Starts an instance.
4336 HPATH = "instance-start"
4337 HTYPE = constants.HTYPE_INSTANCE
4338 _OP_REQP = ["instance_name", "force"]
4340 ("beparams", _EmptyDict),
4341 ("hvparams", _EmptyDict),
4345 def ExpandNames(self):
4346 self._ExpandAndLockInstance()
4348 def BuildHooksEnv(self):
4351 This runs on master, primary and secondary nodes of the instance.
4355 "FORCE": self.op.force,
4357 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4358 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4361 def CheckPrereq(self):
4362 """Check prerequisites.
4364 This checks that the instance is in the cluster.
4367 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4368 assert self.instance is not None, \
4369 "Cannot retrieve locked instance %s" % self.op.instance_name
4372 if self.op.beparams:
4373 if not isinstance(self.op.beparams, dict):
4374 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4375 " dict" % (type(self.op.beparams), ),
4377 # fill the beparams dict
4378 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4381 if self.op.hvparams:
4382 if not isinstance(self.op.hvparams, dict):
4383 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4384 " dict" % (type(self.op.hvparams), ),
4387 # check hypervisor parameter syntax (locally)
4388 cluster = self.cfg.GetClusterInfo()
4389 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4390 filled_hvp = cluster.FillHV(instance)
4391 filled_hvp.update(self.op.hvparams)
4392 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4393 hv_type.CheckParameterSyntax(filled_hvp)
4394 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4396 _CheckNodeOnline(self, instance.primary_node)
4398 bep = self.cfg.GetClusterInfo().FillBE(instance)
4399 # check bridges existence
4400 _CheckInstanceBridgesExist(self, instance)
4402 remote_info = self.rpc.call_instance_info(instance.primary_node,
4404 instance.hypervisor)
4405 remote_info.Raise("Error checking node %s" % instance.primary_node,
4406 prereq=True, ecode=errors.ECODE_ENVIRON)
4407 if not remote_info.payload: # not running already
4408 _CheckNodeFreeMemory(self, instance.primary_node,
4409 "starting instance %s" % instance.name,
4410 bep[constants.BE_MEMORY], instance.hypervisor)
4412 def Exec(self, feedback_fn):
4413 """Start the instance.
4416 instance = self.instance
4417 force = self.op.force
4419 self.cfg.MarkInstanceUp(instance.name)
4421 node_current = instance.primary_node
4423 _StartInstanceDisks(self, instance, force)
4425 result = self.rpc.call_instance_start(node_current, instance,
4426 self.op.hvparams, self.op.beparams)
4427 msg = result.fail_msg
4429 _ShutdownInstanceDisks(self, instance)
4430 raise errors.OpExecError("Could not start instance: %s" % msg)
4433 class LURebootInstance(LogicalUnit):
4434 """Reboot an instance.
4437 HPATH = "instance-reboot"
4438 HTYPE = constants.HTYPE_INSTANCE
4439 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4440 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4443 def CheckArguments(self):
4444 if self.op.reboot_type not in constants.REBOOT_TYPES:
4445 raise errors.OpPrereqError("Invalid reboot type '%s', not one of %s" %
4446 (self.op.reboot_type,
4447 utils.CommaJoin(constants.REBOOT_TYPES)),
4450 def ExpandNames(self):
4451 self._ExpandAndLockInstance()
4453 def BuildHooksEnv(self):
4456 This runs on master, primary and secondary nodes of the instance.
4460 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4461 "REBOOT_TYPE": self.op.reboot_type,
4462 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4464 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4465 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4468 def CheckPrereq(self):
4469 """Check prerequisites.
4471 This checks that the instance is in the cluster.
4474 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4475 assert self.instance is not None, \
4476 "Cannot retrieve locked instance %s" % self.op.instance_name
4478 _CheckNodeOnline(self, instance.primary_node)
4480 # check bridges existence
4481 _CheckInstanceBridgesExist(self, instance)
4483 def Exec(self, feedback_fn):
4484 """Reboot the instance.
4487 instance = self.instance
4488 ignore_secondaries = self.op.ignore_secondaries
4489 reboot_type = self.op.reboot_type
4491 node_current = instance.primary_node
4493 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4494 constants.INSTANCE_REBOOT_HARD]:
4495 for disk in instance.disks:
4496 self.cfg.SetDiskID(disk, node_current)
4497 result = self.rpc.call_instance_reboot(node_current, instance,
4499 self.op.shutdown_timeout)
4500 result.Raise("Could not reboot instance")
4502 result = self.rpc.call_instance_shutdown(node_current, instance,
4503 self.op.shutdown_timeout)
4504 result.Raise("Could not shutdown instance for full reboot")
4505 _ShutdownInstanceDisks(self, instance)
4506 _StartInstanceDisks(self, instance, ignore_secondaries)
4507 result = self.rpc.call_instance_start(node_current, instance, None, None)
4508 msg = result.fail_msg
4510 _ShutdownInstanceDisks(self, instance)
4511 raise errors.OpExecError("Could not start instance for"
4512 " full reboot: %s" % msg)
4514 self.cfg.MarkInstanceUp(instance.name)
4517 class LUShutdownInstance(LogicalUnit):
4518 """Shutdown an instance.
4521 HPATH = "instance-stop"
4522 HTYPE = constants.HTYPE_INSTANCE
4523 _OP_REQP = ["instance_name"]
4524 _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4527 def ExpandNames(self):
4528 self._ExpandAndLockInstance()
4530 def BuildHooksEnv(self):
4533 This runs on master, primary and secondary nodes of the instance.
4536 env = _BuildInstanceHookEnvByObject(self, self.instance)
4537 env["TIMEOUT"] = self.op.timeout
4538 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4541 def CheckPrereq(self):
4542 """Check prerequisites.
4544 This checks that the instance is in the cluster.
4547 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4548 assert self.instance is not None, \
4549 "Cannot retrieve locked instance %s" % self.op.instance_name
4550 _CheckNodeOnline(self, self.instance.primary_node)
4552 def Exec(self, feedback_fn):
4553 """Shutdown the instance.
4556 instance = self.instance
4557 node_current = instance.primary_node
4558 timeout = self.op.timeout
4559 self.cfg.MarkInstanceDown(instance.name)
4560 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4561 msg = result.fail_msg
4563 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4565 _ShutdownInstanceDisks(self, instance)
4568 class LUReinstallInstance(LogicalUnit):
4569 """Reinstall an instance.
4572 HPATH = "instance-reinstall"
4573 HTYPE = constants.HTYPE_INSTANCE
4574 _OP_REQP = ["instance_name"]
4577 ("force_variant", False),
4581 def ExpandNames(self):
4582 self._ExpandAndLockInstance()
4584 def BuildHooksEnv(self):
4587 This runs on master, primary and secondary nodes of the instance.
4590 env = _BuildInstanceHookEnvByObject(self, self.instance)
4591 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4594 def CheckPrereq(self):
4595 """Check prerequisites.
4597 This checks that the instance is in the cluster and is not running.
4600 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4601 assert instance is not None, \
4602 "Cannot retrieve locked instance %s" % self.op.instance_name
4603 _CheckNodeOnline(self, instance.primary_node)
4605 if instance.disk_template == constants.DT_DISKLESS:
4606 raise errors.OpPrereqError("Instance '%s' has no disks" %
4607 self.op.instance_name,
4609 _CheckInstanceDown(self, instance, "cannot reinstall")
4611 if self.op.os_type is not None:
4613 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4614 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4616 self.instance = instance
4618 def Exec(self, feedback_fn):
4619 """Reinstall the instance.
4622 inst = self.instance
4624 if self.op.os_type is not None:
4625 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4626 inst.os = self.op.os_type
4627 self.cfg.Update(inst, feedback_fn)
4629 _StartInstanceDisks(self, inst, None)
4631 feedback_fn("Running the instance OS create scripts...")
4632 # FIXME: pass debug option from opcode to backend
4633 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4634 self.op.debug_level)
4635 result.Raise("Could not install OS for instance %s on node %s" %
4636 (inst.name, inst.primary_node))
4638 _ShutdownInstanceDisks(self, inst)
4641 class LURecreateInstanceDisks(LogicalUnit):
4642 """Recreate an instance's missing disks.
4645 HPATH = "instance-recreate-disks"
4646 HTYPE = constants.HTYPE_INSTANCE
4647 _OP_REQP = ["instance_name", "disks"]
4650 def CheckArguments(self):
4651 """Check the arguments.
4654 if not isinstance(self.op.disks, list):
4655 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4656 for item in self.op.disks:
4657 if (not isinstance(item, int) or
4659 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4660 str(item), errors.ECODE_INVAL)
4662 def ExpandNames(self):
4663 self._ExpandAndLockInstance()
4665 def BuildHooksEnv(self):
4668 This runs on master, primary and secondary nodes of the instance.
4671 env = _BuildInstanceHookEnvByObject(self, self.instance)
4672 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4675 def CheckPrereq(self):
4676 """Check prerequisites.
4678 This checks that the instance is in the cluster and is not running.
4681 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4682 assert instance is not None, \
4683 "Cannot retrieve locked instance %s" % self.op.instance_name
4684 _CheckNodeOnline(self, instance.primary_node)
4686 if instance.disk_template == constants.DT_DISKLESS:
4687 raise errors.OpPrereqError("Instance '%s' has no disks" %
4688 self.op.instance_name, errors.ECODE_INVAL)
4689 _CheckInstanceDown(self, instance, "cannot recreate disks")
4691 if not self.op.disks:
4692 self.op.disks = range(len(instance.disks))
4694 for idx in self.op.disks:
4695 if idx >= len(instance.disks):
4696 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4699 self.instance = instance
4701 def Exec(self, feedback_fn):
4702 """Recreate the disks.
4706 for idx, _ in enumerate(self.instance.disks):
4707 if idx not in self.op.disks: # disk idx has not been passed in
4711 _CreateDisks(self, self.instance, to_skip=to_skip)
4714 class LURenameInstance(LogicalUnit):
4715 """Rename an instance.
4718 HPATH = "instance-rename"
4719 HTYPE = constants.HTYPE_INSTANCE
4720 _OP_REQP = ["instance_name", "new_name"]
4721 _OP_DEFS = [("ignore_ip", False)]
4723 def BuildHooksEnv(self):
4726 This runs on master, primary and secondary nodes of the instance.
4729 env = _BuildInstanceHookEnvByObject(self, self.instance)
4730 env["INSTANCE_NEW_NAME"] = self.op.new_name
4731 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4734 def CheckPrereq(self):
4735 """Check prerequisites.
4737 This checks that the instance is in the cluster and is not running.
4740 self.op.instance_name = _ExpandInstanceName(self.cfg,
4741 self.op.instance_name)
4742 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4743 assert instance is not None
4744 _CheckNodeOnline(self, instance.primary_node)
4745 _CheckInstanceDown(self, instance, "cannot rename")
4746 self.instance = instance
4748 # new name verification
4749 name_info = utils.GetHostInfo(self.op.new_name)
4751 self.op.new_name = new_name = name_info.name
4752 instance_list = self.cfg.GetInstanceList()
4753 if new_name in instance_list:
4754 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4755 new_name, errors.ECODE_EXISTS)
4757 if not self.op.ignore_ip:
4758 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4759 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4760 (name_info.ip, new_name),
4761 errors.ECODE_NOTUNIQUE)
4764 def Exec(self, feedback_fn):
4765 """Reinstall the instance.
4768 inst = self.instance
4769 old_name = inst.name
4771 if inst.disk_template == constants.DT_FILE:
4772 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4774 self.cfg.RenameInstance(inst.name, self.op.new_name)
4775 # Change the instance lock. This is definitely safe while we hold the BGL
4776 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4777 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4779 # re-read the instance from the configuration after rename
4780 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4782 if inst.disk_template == constants.DT_FILE:
4783 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4784 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4785 old_file_storage_dir,
4786 new_file_storage_dir)
4787 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4788 " (but the instance has been renamed in Ganeti)" %
4789 (inst.primary_node, old_file_storage_dir,
4790 new_file_storage_dir))
4792 _StartInstanceDisks(self, inst, None)
4794 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4795 old_name, self.op.debug_level)
4796 msg = result.fail_msg
4798 msg = ("Could not run OS rename script for instance %s on node %s"
4799 " (but the instance has been renamed in Ganeti): %s" %
4800 (inst.name, inst.primary_node, msg))
4801 self.proc.LogWarning(msg)
4803 _ShutdownInstanceDisks(self, inst)
4806 class LURemoveInstance(LogicalUnit):
4807 """Remove an instance.
4810 HPATH = "instance-remove"
4811 HTYPE = constants.HTYPE_INSTANCE
4812 _OP_REQP = ["instance_name", "ignore_failures"]
4813 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4816 def ExpandNames(self):
4817 self._ExpandAndLockInstance()
4818 self.needed_locks[locking.LEVEL_NODE] = []
4819 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4821 def DeclareLocks(self, level):
4822 if level == locking.LEVEL_NODE:
4823 self._LockInstancesNodes()
4825 def BuildHooksEnv(self):
4828 This runs on master, primary and secondary nodes of the instance.
4831 env = _BuildInstanceHookEnvByObject(self, self.instance)
4832 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4833 nl = [self.cfg.GetMasterNode()]
4834 nl_post = list(self.instance.all_nodes) + nl
4835 return env, nl, nl_post
4837 def CheckPrereq(self):
4838 """Check prerequisites.
4840 This checks that the instance is in the cluster.
4843 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4844 assert self.instance is not None, \
4845 "Cannot retrieve locked instance %s" % self.op.instance_name
4847 def Exec(self, feedback_fn):
4848 """Remove the instance.
4851 instance = self.instance
4852 logging.info("Shutting down instance %s on node %s",
4853 instance.name, instance.primary_node)
4855 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4856 self.op.shutdown_timeout)
4857 msg = result.fail_msg
4859 if self.op.ignore_failures:
4860 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4862 raise errors.OpExecError("Could not shutdown instance %s on"
4864 (instance.name, instance.primary_node, msg))
4866 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4869 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4870 """Utility function to remove an instance.
4873 logging.info("Removing block devices for instance %s", instance.name)
4875 if not _RemoveDisks(lu, instance):
4876 if not ignore_failures:
4877 raise errors.OpExecError("Can't remove instance's disks")
4878 feedback_fn("Warning: can't remove instance's disks")
4880 logging.info("Removing instance %s out of cluster config", instance.name)
4882 lu.cfg.RemoveInstance(instance.name)
4884 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4885 "Instance lock removal conflict"
4887 # Remove lock for the instance
4888 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4891 class LUQueryInstances(NoHooksLU):
4892 """Logical unit for querying instances.
4895 # pylint: disable-msg=W0142
4896 _OP_REQP = ["output_fields", "names", "use_locking"]
4898 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4899 "serial_no", "ctime", "mtime", "uuid"]
4900 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4902 "disk_template", "ip", "mac", "bridge",
4903 "nic_mode", "nic_link",
4904 "sda_size", "sdb_size", "vcpus", "tags",
4905 "network_port", "beparams",
4906 r"(disk)\.(size)/([0-9]+)",
4907 r"(disk)\.(sizes)", "disk_usage",
4908 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4909 r"(nic)\.(bridge)/([0-9]+)",
4910 r"(nic)\.(macs|ips|modes|links|bridges)",
4911 r"(disk|nic)\.(count)",
4913 ] + _SIMPLE_FIELDS +
4915 for name in constants.HVS_PARAMETERS
4916 if name not in constants.HVC_GLOBALS] +
4918 for name in constants.BES_PARAMETERS])
4919 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4922 def CheckArguments(self):
4923 _CheckOutputFields(static=self._FIELDS_STATIC,
4924 dynamic=self._FIELDS_DYNAMIC,
4925 selected=self.op.output_fields)
4927 def ExpandNames(self):
4928 self.needed_locks = {}
4929 self.share_locks[locking.LEVEL_INSTANCE] = 1
4930 self.share_locks[locking.LEVEL_NODE] = 1
4933 self.wanted = _GetWantedInstances(self, self.op.names)
4935 self.wanted = locking.ALL_SET
4937 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4938 self.do_locking = self.do_node_query and self.op.use_locking
4940 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4941 self.needed_locks[locking.LEVEL_NODE] = []
4942 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4944 def DeclareLocks(self, level):
4945 if level == locking.LEVEL_NODE and self.do_locking:
4946 self._LockInstancesNodes()
4948 def CheckPrereq(self):
4949 """Check prerequisites.
4954 def Exec(self, feedback_fn):
4955 """Computes the list of nodes and their attributes.
4958 # pylint: disable-msg=R0912
4959 # way too many branches here
4960 all_info = self.cfg.GetAllInstancesInfo()
4961 if self.wanted == locking.ALL_SET:
4962 # caller didn't specify instance names, so ordering is not important
4964 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4966 instance_names = all_info.keys()
4967 instance_names = utils.NiceSort(instance_names)
4969 # caller did specify names, so we must keep the ordering
4971 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4973 tgt_set = all_info.keys()
4974 missing = set(self.wanted).difference(tgt_set)
4976 raise errors.OpExecError("Some instances were removed before"
4977 " retrieving their data: %s" % missing)
4978 instance_names = self.wanted
4980 instance_list = [all_info[iname] for iname in instance_names]
4982 # begin data gathering
4984 nodes = frozenset([inst.primary_node for inst in instance_list])
4985 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4989 if self.do_node_query:
4991 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4993 result = node_data[name]
4995 # offline nodes will be in both lists
4996 off_nodes.append(name)
4998 bad_nodes.append(name)
5001 live_data.update(result.payload)
5002 # else no instance is alive
5004 live_data = dict([(name, {}) for name in instance_names])
5006 # end data gathering
5011 cluster = self.cfg.GetClusterInfo()
5012 for instance in instance_list:
5014 i_hv = cluster.FillHV(instance, skip_globals=True)
5015 i_be = cluster.FillBE(instance)
5016 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5017 for field in self.op.output_fields:
5018 st_match = self._FIELDS_STATIC.Matches(field)
5019 if field in self._SIMPLE_FIELDS:
5020 val = getattr(instance, field)
5021 elif field == "pnode":
5022 val = instance.primary_node
5023 elif field == "snodes":
5024 val = list(instance.secondary_nodes)
5025 elif field == "admin_state":
5026 val = instance.admin_up
5027 elif field == "oper_state":
5028 if instance.primary_node in bad_nodes:
5031 val = bool(live_data.get(instance.name))
5032 elif field == "status":
5033 if instance.primary_node in off_nodes:
5034 val = "ERROR_nodeoffline"
5035 elif instance.primary_node in bad_nodes:
5036 val = "ERROR_nodedown"
5038 running = bool(live_data.get(instance.name))
5040 if instance.admin_up:
5045 if instance.admin_up:
5049 elif field == "oper_ram":
5050 if instance.primary_node in bad_nodes:
5052 elif instance.name in live_data:
5053 val = live_data[instance.name].get("memory", "?")
5056 elif field == "vcpus":
5057 val = i_be[constants.BE_VCPUS]
5058 elif field == "disk_template":
5059 val = instance.disk_template
5062 val = instance.nics[0].ip
5065 elif field == "nic_mode":
5067 val = i_nicp[0][constants.NIC_MODE]
5070 elif field == "nic_link":
5072 val = i_nicp[0][constants.NIC_LINK]
5075 elif field == "bridge":
5076 if (instance.nics and
5077 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5078 val = i_nicp[0][constants.NIC_LINK]
5081 elif field == "mac":
5083 val = instance.nics[0].mac
5086 elif field == "sda_size" or field == "sdb_size":
5087 idx = ord(field[2]) - ord('a')
5089 val = instance.FindDisk(idx).size
5090 except errors.OpPrereqError:
5092 elif field == "disk_usage": # total disk usage per node
5093 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5094 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5095 elif field == "tags":
5096 val = list(instance.GetTags())
5097 elif field == "hvparams":
5099 elif (field.startswith(HVPREFIX) and
5100 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5101 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5102 val = i_hv.get(field[len(HVPREFIX):], None)
5103 elif field == "beparams":
5105 elif (field.startswith(BEPREFIX) and
5106 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5107 val = i_be.get(field[len(BEPREFIX):], None)
5108 elif st_match and st_match.groups():
5109 # matches a variable list
5110 st_groups = st_match.groups()
5111 if st_groups and st_groups[0] == "disk":
5112 if st_groups[1] == "count":
5113 val = len(instance.disks)
5114 elif st_groups[1] == "sizes":
5115 val = [disk.size for disk in instance.disks]
5116 elif st_groups[1] == "size":
5118 val = instance.FindDisk(st_groups[2]).size
5119 except errors.OpPrereqError:
5122 assert False, "Unhandled disk parameter"
5123 elif st_groups[0] == "nic":
5124 if st_groups[1] == "count":
5125 val = len(instance.nics)
5126 elif st_groups[1] == "macs":
5127 val = [nic.mac for nic in instance.nics]
5128 elif st_groups[1] == "ips":
5129 val = [nic.ip for nic in instance.nics]
5130 elif st_groups[1] == "modes":
5131 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5132 elif st_groups[1] == "links":
5133 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5134 elif st_groups[1] == "bridges":
5137 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5138 val.append(nicp[constants.NIC_LINK])
5143 nic_idx = int(st_groups[2])
5144 if nic_idx >= len(instance.nics):
5147 if st_groups[1] == "mac":
5148 val = instance.nics[nic_idx].mac
5149 elif st_groups[1] == "ip":
5150 val = instance.nics[nic_idx].ip
5151 elif st_groups[1] == "mode":
5152 val = i_nicp[nic_idx][constants.NIC_MODE]
5153 elif st_groups[1] == "link":
5154 val = i_nicp[nic_idx][constants.NIC_LINK]
5155 elif st_groups[1] == "bridge":
5156 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5157 if nic_mode == constants.NIC_MODE_BRIDGED:
5158 val = i_nicp[nic_idx][constants.NIC_LINK]
5162 assert False, "Unhandled NIC parameter"
5164 assert False, ("Declared but unhandled variable parameter '%s'" %
5167 assert False, "Declared but unhandled parameter '%s'" % field
5174 class LUFailoverInstance(LogicalUnit):
5175 """Failover an instance.
5178 HPATH = "instance-failover"
5179 HTYPE = constants.HTYPE_INSTANCE
5180 _OP_REQP = ["instance_name", "ignore_consistency"]
5181 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5184 def ExpandNames(self):
5185 self._ExpandAndLockInstance()
5186 self.needed_locks[locking.LEVEL_NODE] = []
5187 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5189 def DeclareLocks(self, level):
5190 if level == locking.LEVEL_NODE:
5191 self._LockInstancesNodes()
5193 def BuildHooksEnv(self):
5196 This runs on master, primary and secondary nodes of the instance.
5199 instance = self.instance
5200 source_node = instance.primary_node
5201 target_node = instance.secondary_nodes[0]
5203 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5204 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5205 "OLD_PRIMARY": source_node,
5206 "OLD_SECONDARY": target_node,
5207 "NEW_PRIMARY": target_node,
5208 "NEW_SECONDARY": source_node,
5210 env.update(_BuildInstanceHookEnvByObject(self, instance))
5211 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5213 nl_post.append(source_node)
5214 return env, nl, nl_post
5216 def CheckPrereq(self):
5217 """Check prerequisites.
5219 This checks that the instance is in the cluster.
5222 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5223 assert self.instance is not None, \
5224 "Cannot retrieve locked instance %s" % self.op.instance_name
5226 bep = self.cfg.GetClusterInfo().FillBE(instance)
5227 if instance.disk_template not in constants.DTS_NET_MIRROR:
5228 raise errors.OpPrereqError("Instance's disk layout is not"
5229 " network mirrored, cannot failover.",
5232 secondary_nodes = instance.secondary_nodes
5233 if not secondary_nodes:
5234 raise errors.ProgrammerError("no secondary node but using "
5235 "a mirrored disk template")
5237 target_node = secondary_nodes[0]
5238 _CheckNodeOnline(self, target_node)
5239 _CheckNodeNotDrained(self, target_node)
5240 if instance.admin_up:
5241 # check memory requirements on the secondary node
5242 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5243 instance.name, bep[constants.BE_MEMORY],
5244 instance.hypervisor)
5246 self.LogInfo("Not checking memory on the secondary node as"
5247 " instance will not be started")
5249 # check bridge existance
5250 _CheckInstanceBridgesExist(self, instance, node=target_node)
5252 def Exec(self, feedback_fn):
5253 """Failover an instance.
5255 The failover is done by shutting it down on its present node and
5256 starting it on the secondary.
5259 instance = self.instance
5261 source_node = instance.primary_node
5262 target_node = instance.secondary_nodes[0]
5264 if instance.admin_up:
5265 feedback_fn("* checking disk consistency between source and target")
5266 for dev in instance.disks:
5267 # for drbd, these are drbd over lvm
5268 if not _CheckDiskConsistency(self, dev, target_node, False):
5269 if not self.op.ignore_consistency:
5270 raise errors.OpExecError("Disk %s is degraded on target node,"
5271 " aborting failover." % dev.iv_name)
5273 feedback_fn("* not checking disk consistency as instance is not running")
5275 feedback_fn("* shutting down instance on source node")
5276 logging.info("Shutting down instance %s on node %s",
5277 instance.name, source_node)
5279 result = self.rpc.call_instance_shutdown(source_node, instance,
5280 self.op.shutdown_timeout)
5281 msg = result.fail_msg
5283 if self.op.ignore_consistency:
5284 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5285 " Proceeding anyway. Please make sure node"
5286 " %s is down. Error details: %s",
5287 instance.name, source_node, source_node, msg)
5289 raise errors.OpExecError("Could not shutdown instance %s on"
5291 (instance.name, source_node, msg))
5293 feedback_fn("* deactivating the instance's disks on source node")
5294 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5295 raise errors.OpExecError("Can't shut down the instance's disks.")
5297 instance.primary_node = target_node
5298 # distribute new instance config to the other nodes
5299 self.cfg.Update(instance, feedback_fn)
5301 # Only start the instance if it's marked as up
5302 if instance.admin_up:
5303 feedback_fn("* activating the instance's disks on target node")
5304 logging.info("Starting instance %s on node %s",
5305 instance.name, target_node)
5307 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5308 ignore_secondaries=True)
5310 _ShutdownInstanceDisks(self, instance)
5311 raise errors.OpExecError("Can't activate the instance's disks")
5313 feedback_fn("* starting the instance on the target node")
5314 result = self.rpc.call_instance_start(target_node, instance, None, None)
5315 msg = result.fail_msg
5317 _ShutdownInstanceDisks(self, instance)
5318 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5319 (instance.name, target_node, msg))
5322 class LUMigrateInstance(LogicalUnit):
5323 """Migrate an instance.
5325 This is migration without shutting down, compared to the failover,
5326 which is done with shutdown.
5329 HPATH = "instance-migrate"
5330 HTYPE = constants.HTYPE_INSTANCE
5331 _OP_REQP = ["instance_name", "live", "cleanup"]
5335 def ExpandNames(self):
5336 self._ExpandAndLockInstance()
5338 self.needed_locks[locking.LEVEL_NODE] = []
5339 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5341 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5342 self.op.live, self.op.cleanup)
5343 self.tasklets = [self._migrater]
5345 def DeclareLocks(self, level):
5346 if level == locking.LEVEL_NODE:
5347 self._LockInstancesNodes()
5349 def BuildHooksEnv(self):
5352 This runs on master, primary and secondary nodes of the instance.
5355 instance = self._migrater.instance
5356 source_node = instance.primary_node
5357 target_node = instance.secondary_nodes[0]
5358 env = _BuildInstanceHookEnvByObject(self, instance)
5359 env["MIGRATE_LIVE"] = self.op.live
5360 env["MIGRATE_CLEANUP"] = self.op.cleanup
5362 "OLD_PRIMARY": source_node,
5363 "OLD_SECONDARY": target_node,
5364 "NEW_PRIMARY": target_node,
5365 "NEW_SECONDARY": source_node,
5367 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5369 nl_post.append(source_node)
5370 return env, nl, nl_post
5373 class LUMoveInstance(LogicalUnit):
5374 """Move an instance by data-copying.
5377 HPATH = "instance-move"
5378 HTYPE = constants.HTYPE_INSTANCE
5379 _OP_REQP = ["instance_name", "target_node"]
5380 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5383 def ExpandNames(self):
5384 self._ExpandAndLockInstance()
5385 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5386 self.op.target_node = target_node
5387 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5388 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5390 def DeclareLocks(self, level):
5391 if level == locking.LEVEL_NODE:
5392 self._LockInstancesNodes(primary_only=True)
5394 def BuildHooksEnv(self):
5397 This runs on master, primary and secondary nodes of the instance.
5401 "TARGET_NODE": self.op.target_node,
5402 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5404 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5405 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5406 self.op.target_node]
5409 def CheckPrereq(self):
5410 """Check prerequisites.
5412 This checks that the instance is in the cluster.
5415 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5416 assert self.instance is not None, \
5417 "Cannot retrieve locked instance %s" % self.op.instance_name
5419 node = self.cfg.GetNodeInfo(self.op.target_node)
5420 assert node is not None, \
5421 "Cannot retrieve locked node %s" % self.op.target_node
5423 self.target_node = target_node = node.name
5425 if target_node == instance.primary_node:
5426 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5427 (instance.name, target_node),
5430 bep = self.cfg.GetClusterInfo().FillBE(instance)
5432 for idx, dsk in enumerate(instance.disks):
5433 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5434 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5435 " cannot copy" % idx, errors.ECODE_STATE)
5437 _CheckNodeOnline(self, target_node)
5438 _CheckNodeNotDrained(self, target_node)
5440 if instance.admin_up:
5441 # check memory requirements on the secondary node
5442 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5443 instance.name, bep[constants.BE_MEMORY],
5444 instance.hypervisor)
5446 self.LogInfo("Not checking memory on the secondary node as"
5447 " instance will not be started")
5449 # check bridge existance
5450 _CheckInstanceBridgesExist(self, instance, node=target_node)
5452 def Exec(self, feedback_fn):
5453 """Move an instance.
5455 The move is done by shutting it down on its present node, copying
5456 the data over (slow) and starting it on the new node.
5459 instance = self.instance
5461 source_node = instance.primary_node
5462 target_node = self.target_node
5464 self.LogInfo("Shutting down instance %s on source node %s",
5465 instance.name, source_node)
5467 result = self.rpc.call_instance_shutdown(source_node, instance,
5468 self.op.shutdown_timeout)
5469 msg = result.fail_msg
5471 if self.op.ignore_consistency:
5472 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5473 " Proceeding anyway. Please make sure node"
5474 " %s is down. Error details: %s",
5475 instance.name, source_node, source_node, msg)
5477 raise errors.OpExecError("Could not shutdown instance %s on"
5479 (instance.name, source_node, msg))
5481 # create the target disks
5483 _CreateDisks(self, instance, target_node=target_node)
5484 except errors.OpExecError:
5485 self.LogWarning("Device creation failed, reverting...")
5487 _RemoveDisks(self, instance, target_node=target_node)
5489 self.cfg.ReleaseDRBDMinors(instance.name)
5492 cluster_name = self.cfg.GetClusterInfo().cluster_name
5495 # activate, get path, copy the data over
5496 for idx, disk in enumerate(instance.disks):
5497 self.LogInfo("Copying data for disk %d", idx)
5498 result = self.rpc.call_blockdev_assemble(target_node, disk,
5499 instance.name, True)
5501 self.LogWarning("Can't assemble newly created disk %d: %s",
5502 idx, result.fail_msg)
5503 errs.append(result.fail_msg)
5505 dev_path = result.payload
5506 result = self.rpc.call_blockdev_export(source_node, disk,
5507 target_node, dev_path,
5510 self.LogWarning("Can't copy data over for disk %d: %s",
5511 idx, result.fail_msg)
5512 errs.append(result.fail_msg)
5516 self.LogWarning("Some disks failed to copy, aborting")
5518 _RemoveDisks(self, instance, target_node=target_node)
5520 self.cfg.ReleaseDRBDMinors(instance.name)
5521 raise errors.OpExecError("Errors during disk copy: %s" %
5524 instance.primary_node = target_node
5525 self.cfg.Update(instance, feedback_fn)
5527 self.LogInfo("Removing the disks on the original node")
5528 _RemoveDisks(self, instance, target_node=source_node)
5530 # Only start the instance if it's marked as up
5531 if instance.admin_up:
5532 self.LogInfo("Starting instance %s on node %s",
5533 instance.name, target_node)
5535 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5536 ignore_secondaries=True)
5538 _ShutdownInstanceDisks(self, instance)
5539 raise errors.OpExecError("Can't activate the instance's disks")
5541 result = self.rpc.call_instance_start(target_node, instance, None, None)
5542 msg = result.fail_msg
5544 _ShutdownInstanceDisks(self, instance)
5545 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5546 (instance.name, target_node, msg))
5549 class LUMigrateNode(LogicalUnit):
5550 """Migrate all instances from a node.
5553 HPATH = "node-migrate"
5554 HTYPE = constants.HTYPE_NODE
5555 _OP_REQP = ["node_name", "live"]
5558 def ExpandNames(self):
5559 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5561 self.needed_locks = {
5562 locking.LEVEL_NODE: [self.op.node_name],
5565 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5567 # Create tasklets for migrating instances for all instances on this node
5571 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5572 logging.debug("Migrating instance %s", inst.name)
5573 names.append(inst.name)
5575 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5577 self.tasklets = tasklets
5579 # Declare instance locks
5580 self.needed_locks[locking.LEVEL_INSTANCE] = names
5582 def DeclareLocks(self, level):
5583 if level == locking.LEVEL_NODE:
5584 self._LockInstancesNodes()
5586 def BuildHooksEnv(self):
5589 This runs on the master, the primary and all the secondaries.
5593 "NODE_NAME": self.op.node_name,
5596 nl = [self.cfg.GetMasterNode()]
5598 return (env, nl, nl)
5601 class TLMigrateInstance(Tasklet):
5602 def __init__(self, lu, instance_name, live, cleanup):
5603 """Initializes this class.
5606 Tasklet.__init__(self, lu)
5609 self.instance_name = instance_name
5611 self.cleanup = cleanup
5613 def CheckPrereq(self):
5614 """Check prerequisites.
5616 This checks that the instance is in the cluster.
5619 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5620 instance = self.cfg.GetInstanceInfo(instance_name)
5621 assert instance is not None
5623 if instance.disk_template != constants.DT_DRBD8:
5624 raise errors.OpPrereqError("Instance's disk layout is not"
5625 " drbd8, cannot migrate.", errors.ECODE_STATE)
5627 secondary_nodes = instance.secondary_nodes
5628 if not secondary_nodes:
5629 raise errors.ConfigurationError("No secondary node but using"
5630 " drbd8 disk template")
5632 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5634 target_node = secondary_nodes[0]
5635 # check memory requirements on the secondary node
5636 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5637 instance.name, i_be[constants.BE_MEMORY],
5638 instance.hypervisor)
5640 # check bridge existance
5641 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5643 if not self.cleanup:
5644 _CheckNodeNotDrained(self.lu, target_node)
5645 result = self.rpc.call_instance_migratable(instance.primary_node,
5647 result.Raise("Can't migrate, please use failover",
5648 prereq=True, ecode=errors.ECODE_STATE)
5650 self.instance = instance
5652 def _WaitUntilSync(self):
5653 """Poll with custom rpc for disk sync.
5655 This uses our own step-based rpc call.
5658 self.feedback_fn("* wait until resync is done")
5662 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5664 self.instance.disks)
5666 for node, nres in result.items():
5667 nres.Raise("Cannot resync disks on node %s" % node)
5668 node_done, node_percent = nres.payload
5669 all_done = all_done and node_done
5670 if node_percent is not None:
5671 min_percent = min(min_percent, node_percent)
5673 if min_percent < 100:
5674 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5677 def _EnsureSecondary(self, node):
5678 """Demote a node to secondary.
5681 self.feedback_fn("* switching node %s to secondary mode" % node)
5683 for dev in self.instance.disks:
5684 self.cfg.SetDiskID(dev, node)
5686 result = self.rpc.call_blockdev_close(node, self.instance.name,
5687 self.instance.disks)
5688 result.Raise("Cannot change disk to secondary on node %s" % node)
5690 def _GoStandalone(self):
5691 """Disconnect from the network.
5694 self.feedback_fn("* changing into standalone mode")
5695 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5696 self.instance.disks)
5697 for node, nres in result.items():
5698 nres.Raise("Cannot disconnect disks node %s" % node)
5700 def _GoReconnect(self, multimaster):
5701 """Reconnect to the network.
5707 msg = "single-master"
5708 self.feedback_fn("* changing disks into %s mode" % msg)
5709 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5710 self.instance.disks,
5711 self.instance.name, multimaster)
5712 for node, nres in result.items():
5713 nres.Raise("Cannot change disks config on node %s" % node)
5715 def _ExecCleanup(self):
5716 """Try to cleanup after a failed migration.
5718 The cleanup is done by:
5719 - check that the instance is running only on one node
5720 (and update the config if needed)
5721 - change disks on its secondary node to secondary
5722 - wait until disks are fully synchronized
5723 - disconnect from the network
5724 - change disks into single-master mode
5725 - wait again until disks are fully synchronized
5728 instance = self.instance
5729 target_node = self.target_node
5730 source_node = self.source_node
5732 # check running on only one node
5733 self.feedback_fn("* checking where the instance actually runs"
5734 " (if this hangs, the hypervisor might be in"
5736 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5737 for node, result in ins_l.items():
5738 result.Raise("Can't contact node %s" % node)
5740 runningon_source = instance.name in ins_l[source_node].payload
5741 runningon_target = instance.name in ins_l[target_node].payload
5743 if runningon_source and runningon_target:
5744 raise errors.OpExecError("Instance seems to be running on two nodes,"
5745 " or the hypervisor is confused. You will have"
5746 " to ensure manually that it runs only on one"
5747 " and restart this operation.")
5749 if not (runningon_source or runningon_target):
5750 raise errors.OpExecError("Instance does not seem to be running at all."
5751 " In this case, it's safer to repair by"
5752 " running 'gnt-instance stop' to ensure disk"
5753 " shutdown, and then restarting it.")
5755 if runningon_target:
5756 # the migration has actually succeeded, we need to update the config
5757 self.feedback_fn("* instance running on secondary node (%s),"
5758 " updating config" % target_node)
5759 instance.primary_node = target_node
5760 self.cfg.Update(instance, self.feedback_fn)
5761 demoted_node = source_node
5763 self.feedback_fn("* instance confirmed to be running on its"
5764 " primary node (%s)" % source_node)
5765 demoted_node = target_node
5767 self._EnsureSecondary(demoted_node)
5769 self._WaitUntilSync()
5770 except errors.OpExecError:
5771 # we ignore here errors, since if the device is standalone, it
5772 # won't be able to sync
5774 self._GoStandalone()
5775 self._GoReconnect(False)
5776 self._WaitUntilSync()
5778 self.feedback_fn("* done")
5780 def _RevertDiskStatus(self):
5781 """Try to revert the disk status after a failed migration.
5784 target_node = self.target_node
5786 self._EnsureSecondary(target_node)
5787 self._GoStandalone()
5788 self._GoReconnect(False)
5789 self._WaitUntilSync()
5790 except errors.OpExecError, err:
5791 self.lu.LogWarning("Migration failed and I can't reconnect the"
5792 " drives: error '%s'\n"
5793 "Please look and recover the instance status" %
5796 def _AbortMigration(self):
5797 """Call the hypervisor code to abort a started migration.
5800 instance = self.instance
5801 target_node = self.target_node
5802 migration_info = self.migration_info
5804 abort_result = self.rpc.call_finalize_migration(target_node,
5808 abort_msg = abort_result.fail_msg
5810 logging.error("Aborting migration failed on target node %s: %s",
5811 target_node, abort_msg)
5812 # Don't raise an exception here, as we stil have to try to revert the
5813 # disk status, even if this step failed.
5815 def _ExecMigration(self):
5816 """Migrate an instance.
5818 The migrate is done by:
5819 - change the disks into dual-master mode
5820 - wait until disks are fully synchronized again
5821 - migrate the instance
5822 - change disks on the new secondary node (the old primary) to secondary
5823 - wait until disks are fully synchronized
5824 - change disks into single-master mode
5827 instance = self.instance
5828 target_node = self.target_node
5829 source_node = self.source_node
5831 self.feedback_fn("* checking disk consistency between source and target")
5832 for dev in instance.disks:
5833 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5834 raise errors.OpExecError("Disk %s is degraded or not fully"
5835 " synchronized on target node,"
5836 " aborting migrate." % dev.iv_name)
5838 # First get the migration information from the remote node
5839 result = self.rpc.call_migration_info(source_node, instance)
5840 msg = result.fail_msg
5842 log_err = ("Failed fetching source migration information from %s: %s" %
5844 logging.error(log_err)
5845 raise errors.OpExecError(log_err)
5847 self.migration_info = migration_info = result.payload
5849 # Then switch the disks to master/master mode
5850 self._EnsureSecondary(target_node)
5851 self._GoStandalone()
5852 self._GoReconnect(True)
5853 self._WaitUntilSync()
5855 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5856 result = self.rpc.call_accept_instance(target_node,
5859 self.nodes_ip[target_node])
5861 msg = result.fail_msg
5863 logging.error("Instance pre-migration failed, trying to revert"
5864 " disk status: %s", msg)
5865 self.feedback_fn("Pre-migration failed, aborting")
5866 self._AbortMigration()
5867 self._RevertDiskStatus()
5868 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5869 (instance.name, msg))
5871 self.feedback_fn("* migrating instance to %s" % target_node)
5873 result = self.rpc.call_instance_migrate(source_node, instance,
5874 self.nodes_ip[target_node],
5876 msg = result.fail_msg
5878 logging.error("Instance migration failed, trying to revert"
5879 " disk status: %s", msg)
5880 self.feedback_fn("Migration failed, aborting")
5881 self._AbortMigration()
5882 self._RevertDiskStatus()
5883 raise errors.OpExecError("Could not migrate instance %s: %s" %
5884 (instance.name, msg))
5887 instance.primary_node = target_node
5888 # distribute new instance config to the other nodes
5889 self.cfg.Update(instance, self.feedback_fn)
5891 result = self.rpc.call_finalize_migration(target_node,
5895 msg = result.fail_msg
5897 logging.error("Instance migration succeeded, but finalization failed:"
5899 raise errors.OpExecError("Could not finalize instance migration: %s" %
5902 self._EnsureSecondary(source_node)
5903 self._WaitUntilSync()
5904 self._GoStandalone()
5905 self._GoReconnect(False)
5906 self._WaitUntilSync()
5908 self.feedback_fn("* done")
5910 def Exec(self, feedback_fn):
5911 """Perform the migration.
5914 feedback_fn("Migrating instance %s" % self.instance.name)
5916 self.feedback_fn = feedback_fn
5918 self.source_node = self.instance.primary_node
5919 self.target_node = self.instance.secondary_nodes[0]
5920 self.all_nodes = [self.source_node, self.target_node]
5922 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5923 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5927 return self._ExecCleanup()
5929 return self._ExecMigration()
5932 def _CreateBlockDev(lu, node, instance, device, force_create,
5934 """Create a tree of block devices on a given node.
5936 If this device type has to be created on secondaries, create it and
5939 If not, just recurse to children keeping the same 'force' value.
5941 @param lu: the lu on whose behalf we execute
5942 @param node: the node on which to create the device
5943 @type instance: L{objects.Instance}
5944 @param instance: the instance which owns the device
5945 @type device: L{objects.Disk}
5946 @param device: the device to create
5947 @type force_create: boolean
5948 @param force_create: whether to force creation of this device; this
5949 will be change to True whenever we find a device which has
5950 CreateOnSecondary() attribute
5951 @param info: the extra 'metadata' we should attach to the device
5952 (this will be represented as a LVM tag)
5953 @type force_open: boolean
5954 @param force_open: this parameter will be passes to the
5955 L{backend.BlockdevCreate} function where it specifies
5956 whether we run on primary or not, and it affects both
5957 the child assembly and the device own Open() execution
5960 if device.CreateOnSecondary():
5964 for child in device.children:
5965 _CreateBlockDev(lu, node, instance, child, force_create,
5968 if not force_create:
5971 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5974 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5975 """Create a single block device on a given node.
5977 This will not recurse over children of the device, so they must be
5980 @param lu: the lu on whose behalf we execute
5981 @param node: the node on which to create the device
5982 @type instance: L{objects.Instance}
5983 @param instance: the instance which owns the device
5984 @type device: L{objects.Disk}
5985 @param device: the device to create
5986 @param info: the extra 'metadata' we should attach to the device
5987 (this will be represented as a LVM tag)
5988 @type force_open: boolean
5989 @param force_open: this parameter will be passes to the
5990 L{backend.BlockdevCreate} function where it specifies
5991 whether we run on primary or not, and it affects both
5992 the child assembly and the device own Open() execution
5995 lu.cfg.SetDiskID(device, node)
5996 result = lu.rpc.call_blockdev_create(node, device, device.size,
5997 instance.name, force_open, info)
5998 result.Raise("Can't create block device %s on"
5999 " node %s for instance %s" % (device, node, instance.name))
6000 if device.physical_id is None:
6001 device.physical_id = result.payload
6004 def _GenerateUniqueNames(lu, exts):
6005 """Generate a suitable LV name.
6007 This will generate a logical volume name for the given instance.
6012 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6013 results.append("%s%s" % (new_id, val))
6017 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6019 """Generate a drbd8 device complete with its children.
6022 port = lu.cfg.AllocatePort()
6023 vgname = lu.cfg.GetVGName()
6024 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6025 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6026 logical_id=(vgname, names[0]))
6027 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6028 logical_id=(vgname, names[1]))
6029 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6030 logical_id=(primary, secondary, port,
6033 children=[dev_data, dev_meta],
6038 def _GenerateDiskTemplate(lu, template_name,
6039 instance_name, primary_node,
6040 secondary_nodes, disk_info,
6041 file_storage_dir, file_driver,
6043 """Generate the entire disk layout for a given template type.
6046 #TODO: compute space requirements
6048 vgname = lu.cfg.GetVGName()
6049 disk_count = len(disk_info)
6051 if template_name == constants.DT_DISKLESS:
6053 elif template_name == constants.DT_PLAIN:
6054 if len(secondary_nodes) != 0:
6055 raise errors.ProgrammerError("Wrong template configuration")
6057 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6058 for i in range(disk_count)])
6059 for idx, disk in enumerate(disk_info):
6060 disk_index = idx + base_index
6061 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6062 logical_id=(vgname, names[idx]),
6063 iv_name="disk/%d" % disk_index,
6065 disks.append(disk_dev)
6066 elif template_name == constants.DT_DRBD8:
6067 if len(secondary_nodes) != 1:
6068 raise errors.ProgrammerError("Wrong template configuration")
6069 remote_node = secondary_nodes[0]
6070 minors = lu.cfg.AllocateDRBDMinor(
6071 [primary_node, remote_node] * len(disk_info), instance_name)
6074 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6075 for i in range(disk_count)]):
6076 names.append(lv_prefix + "_data")
6077 names.append(lv_prefix + "_meta")
6078 for idx, disk in enumerate(disk_info):
6079 disk_index = idx + base_index
6080 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6081 disk["size"], names[idx*2:idx*2+2],
6082 "disk/%d" % disk_index,
6083 minors[idx*2], minors[idx*2+1])
6084 disk_dev.mode = disk["mode"]
6085 disks.append(disk_dev)
6086 elif template_name == constants.DT_FILE:
6087 if len(secondary_nodes) != 0:
6088 raise errors.ProgrammerError("Wrong template configuration")
6090 _RequireFileStorage()
6092 for idx, disk in enumerate(disk_info):
6093 disk_index = idx + base_index
6094 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6095 iv_name="disk/%d" % disk_index,
6096 logical_id=(file_driver,
6097 "%s/disk%d" % (file_storage_dir,
6100 disks.append(disk_dev)
6102 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6106 def _GetInstanceInfoText(instance):
6107 """Compute that text that should be added to the disk's metadata.
6110 return "originstname+%s" % instance.name
6113 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6114 """Create all disks for an instance.
6116 This abstracts away some work from AddInstance.
6118 @type lu: L{LogicalUnit}
6119 @param lu: the logical unit on whose behalf we execute
6120 @type instance: L{objects.Instance}
6121 @param instance: the instance whose disks we should create
6123 @param to_skip: list of indices to skip
6124 @type target_node: string
6125 @param target_node: if passed, overrides the target node for creation
6127 @return: the success of the creation
6130 info = _GetInstanceInfoText(instance)
6131 if target_node is None:
6132 pnode = instance.primary_node
6133 all_nodes = instance.all_nodes
6138 if instance.disk_template == constants.DT_FILE:
6139 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6140 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6142 result.Raise("Failed to create directory '%s' on"
6143 " node %s" % (file_storage_dir, pnode))
6145 # Note: this needs to be kept in sync with adding of disks in
6146 # LUSetInstanceParams
6147 for idx, device in enumerate(instance.disks):
6148 if to_skip and idx in to_skip:
6150 logging.info("Creating volume %s for instance %s",
6151 device.iv_name, instance.name)
6153 for node in all_nodes:
6154 f_create = node == pnode
6155 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6158 def _RemoveDisks(lu, instance, target_node=None):
6159 """Remove all disks for an instance.
6161 This abstracts away some work from `AddInstance()` and
6162 `RemoveInstance()`. Note that in case some of the devices couldn't
6163 be removed, the removal will continue with the other ones (compare
6164 with `_CreateDisks()`).
6166 @type lu: L{LogicalUnit}
6167 @param lu: the logical unit on whose behalf we execute
6168 @type instance: L{objects.Instance}
6169 @param instance: the instance whose disks we should remove
6170 @type target_node: string
6171 @param target_node: used to override the node on which to remove the disks
6173 @return: the success of the removal
6176 logging.info("Removing block devices for instance %s", instance.name)
6179 for device in instance.disks:
6181 edata = [(target_node, device)]
6183 edata = device.ComputeNodeTree(instance.primary_node)
6184 for node, disk in edata:
6185 lu.cfg.SetDiskID(disk, node)
6186 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6188 lu.LogWarning("Could not remove block device %s on node %s,"
6189 " continuing anyway: %s", device.iv_name, node, msg)
6192 if instance.disk_template == constants.DT_FILE:
6193 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6197 tgt = instance.primary_node
6198 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6200 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6201 file_storage_dir, instance.primary_node, result.fail_msg)
6207 def _ComputeDiskSize(disk_template, disks):
6208 """Compute disk size requirements in the volume group
6211 # Required free disk space as a function of disk and swap space
6213 constants.DT_DISKLESS: None,
6214 constants.DT_PLAIN: sum(d["size"] for d in disks),
6215 # 128 MB are added for drbd metadata for each disk
6216 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6217 constants.DT_FILE: None,
6220 if disk_template not in req_size_dict:
6221 raise errors.ProgrammerError("Disk template '%s' size requirement"
6222 " is unknown" % disk_template)
6224 return req_size_dict[disk_template]
6227 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6228 """Hypervisor parameter validation.
6230 This function abstract the hypervisor parameter validation to be
6231 used in both instance create and instance modify.
6233 @type lu: L{LogicalUnit}
6234 @param lu: the logical unit for which we check
6235 @type nodenames: list
6236 @param nodenames: the list of nodes on which we should check
6237 @type hvname: string
6238 @param hvname: the name of the hypervisor we should use
6239 @type hvparams: dict
6240 @param hvparams: the parameters which we need to check
6241 @raise errors.OpPrereqError: if the parameters are not valid
6244 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6247 for node in nodenames:
6251 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6254 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6255 """OS parameters validation.
6257 @type lu: L{LogicalUnit}
6258 @param lu: the logical unit for which we check
6259 @type required: boolean
6260 @param required: whether the validation should fail if the OS is not
6262 @type nodenames: list
6263 @param nodenames: the list of nodes on which we should check
6264 @type osname: string
6265 @param osname: the name of the hypervisor we should use
6266 @type osparams: dict
6267 @param osparams: the parameters which we need to check
6268 @raise errors.OpPrereqError: if the parameters are not valid
6271 result = lu.rpc.call_os_validate(required, nodenames, osname,
6272 [constants.OS_VALIDATE_PARAMETERS],
6274 for node, nres in result.items():
6275 # we don't check for offline cases since this should be run only
6276 # against the master node and/or an instance's nodes
6277 nres.Raise("OS Parameters validation failed on node %s" % node)
6278 if not nres.payload:
6279 lu.LogInfo("OS %s not found on node %s, validation skipped",
6283 class LUCreateInstance(LogicalUnit):
6284 """Create an instance.
6287 HPATH = "instance-add"
6288 HTYPE = constants.HTYPE_INSTANCE
6289 _OP_REQP = ["instance_name", "disks",
6291 "wait_for_sync", "ip_check", "nics",
6292 "hvparams", "beparams", "osparams"]
6294 ("name_check", True),
6295 ("no_install", False),
6297 ("force_variant", False),
6298 ("source_handshake", None),
6299 ("source_x509_ca", None),
6300 ("source_instance_name", None),
6305 ("iallocator", None),
6306 ("hypervisor", None),
6307 ("disk_template", None),
6308 ("identify_defaults", None),
6312 def CheckArguments(self):
6316 # do not require name_check to ease forward/backward compatibility
6318 if self.op.no_install and self.op.start:
6319 self.LogInfo("No-installation mode selected, disabling startup")
6320 self.op.start = False
6321 # validate/normalize the instance name
6322 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6323 if self.op.ip_check and not self.op.name_check:
6324 # TODO: make the ip check more flexible and not depend on the name check
6325 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6328 # check nics' parameter names
6329 for nic in self.op.nics:
6330 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6332 # check disks. parameter names and consistent adopt/no-adopt strategy
6333 has_adopt = has_no_adopt = False
6334 for disk in self.op.disks:
6335 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6340 if has_adopt and has_no_adopt:
6341 raise errors.OpPrereqError("Either all disks are adopted or none is",
6344 if self.op.disk_template != constants.DT_PLAIN:
6345 raise errors.OpPrereqError("Disk adoption is only supported for the"
6346 " 'plain' disk template",
6348 if self.op.iallocator is not None:
6349 raise errors.OpPrereqError("Disk adoption not allowed with an"
6350 " iallocator script", errors.ECODE_INVAL)
6351 if self.op.mode == constants.INSTANCE_IMPORT:
6352 raise errors.OpPrereqError("Disk adoption not allowed for"
6353 " instance import", errors.ECODE_INVAL)
6355 self.adopt_disks = has_adopt
6357 # verify creation mode
6358 if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6359 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6360 self.op.mode, errors.ECODE_INVAL)
6362 # instance name verification
6363 if self.op.name_check:
6364 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6365 self.op.instance_name = self.hostname1.name
6366 # used in CheckPrereq for ip ping check
6367 self.check_ip = self.hostname1.ip
6368 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6369 raise errors.OpPrereqError("Remote imports require names to be checked" %
6372 self.check_ip = None
6374 # file storage checks
6375 if (self.op.file_driver and
6376 not self.op.file_driver in constants.FILE_DRIVER):
6377 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6378 self.op.file_driver, errors.ECODE_INVAL)
6380 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6381 raise errors.OpPrereqError("File storage directory path not absolute",
6384 ### Node/iallocator related checks
6385 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6386 raise errors.OpPrereqError("One and only one of iallocator and primary"
6387 " node must be given",
6390 self._cds = _GetClusterDomainSecret()
6392 if self.op.mode == constants.INSTANCE_IMPORT:
6393 # On import force_variant must be True, because if we forced it at
6394 # initial install, our only chance when importing it back is that it
6396 self.op.force_variant = True
6398 if self.op.no_install:
6399 self.LogInfo("No-installation mode has no effect during import")
6401 elif self.op.mode == constants.INSTANCE_CREATE:
6402 if self.op.os_type is None:
6403 raise errors.OpPrereqError("No guest OS specified",
6405 if self.op.disk_template is None:
6406 raise errors.OpPrereqError("No disk template specified",
6409 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6410 # Check handshake to ensure both clusters have the same domain secret
6411 src_handshake = self.op.source_handshake
6412 if not src_handshake:
6413 raise errors.OpPrereqError("Missing source handshake",
6416 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6419 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6422 # Load and check source CA
6423 self.source_x509_ca_pem = self.op.source_x509_ca
6424 if not self.source_x509_ca_pem:
6425 raise errors.OpPrereqError("Missing source X509 CA",
6429 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6431 except OpenSSL.crypto.Error, err:
6432 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6433 (err, ), errors.ECODE_INVAL)
6435 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6436 if errcode is not None:
6437 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6440 self.source_x509_ca = cert
6442 src_instance_name = self.op.source_instance_name
6443 if not src_instance_name:
6444 raise errors.OpPrereqError("Missing source instance name",
6447 self.source_instance_name = \
6448 utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6451 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6452 self.op.mode, errors.ECODE_INVAL)
6454 def ExpandNames(self):
6455 """ExpandNames for CreateInstance.
6457 Figure out the right locks for instance creation.
6460 self.needed_locks = {}
6462 instance_name = self.op.instance_name
6463 # this is just a preventive check, but someone might still add this
6464 # instance in the meantime, and creation will fail at lock-add time
6465 if instance_name in self.cfg.GetInstanceList():
6466 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6467 instance_name, errors.ECODE_EXISTS)
6469 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6471 if self.op.iallocator:
6472 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6474 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6475 nodelist = [self.op.pnode]
6476 if self.op.snode is not None:
6477 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6478 nodelist.append(self.op.snode)
6479 self.needed_locks[locking.LEVEL_NODE] = nodelist
6481 # in case of import lock the source node too
6482 if self.op.mode == constants.INSTANCE_IMPORT:
6483 src_node = self.op.src_node
6484 src_path = self.op.src_path
6486 if src_path is None:
6487 self.op.src_path = src_path = self.op.instance_name
6489 if src_node is None:
6490 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6491 self.op.src_node = None
6492 if os.path.isabs(src_path):
6493 raise errors.OpPrereqError("Importing an instance from an absolute"
6494 " path requires a source node option.",
6497 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6498 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6499 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6500 if not os.path.isabs(src_path):
6501 self.op.src_path = src_path = \
6502 utils.PathJoin(constants.EXPORT_DIR, src_path)
6504 def _RunAllocator(self):
6505 """Run the allocator based on input opcode.
6508 nics = [n.ToDict() for n in self.nics]
6509 ial = IAllocator(self.cfg, self.rpc,
6510 mode=constants.IALLOCATOR_MODE_ALLOC,
6511 name=self.op.instance_name,
6512 disk_template=self.op.disk_template,
6515 vcpus=self.be_full[constants.BE_VCPUS],
6516 mem_size=self.be_full[constants.BE_MEMORY],
6519 hypervisor=self.op.hypervisor,
6522 ial.Run(self.op.iallocator)
6525 raise errors.OpPrereqError("Can't compute nodes using"
6526 " iallocator '%s': %s" %
6527 (self.op.iallocator, ial.info),
6529 if len(ial.result) != ial.required_nodes:
6530 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6531 " of nodes (%s), required %s" %
6532 (self.op.iallocator, len(ial.result),
6533 ial.required_nodes), errors.ECODE_FAULT)
6534 self.op.pnode = ial.result[0]
6535 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6536 self.op.instance_name, self.op.iallocator,
6537 utils.CommaJoin(ial.result))
6538 if ial.required_nodes == 2:
6539 self.op.snode = ial.result[1]
6541 def BuildHooksEnv(self):
6544 This runs on master, primary and secondary nodes of the instance.
6548 "ADD_MODE": self.op.mode,
6550 if self.op.mode == constants.INSTANCE_IMPORT:
6551 env["SRC_NODE"] = self.op.src_node
6552 env["SRC_PATH"] = self.op.src_path
6553 env["SRC_IMAGES"] = self.src_images
6555 env.update(_BuildInstanceHookEnv(
6556 name=self.op.instance_name,
6557 primary_node=self.op.pnode,
6558 secondary_nodes=self.secondaries,
6559 status=self.op.start,
6560 os_type=self.op.os_type,
6561 memory=self.be_full[constants.BE_MEMORY],
6562 vcpus=self.be_full[constants.BE_VCPUS],
6563 nics=_NICListToTuple(self, self.nics),
6564 disk_template=self.op.disk_template,
6565 disks=[(d["size"], d["mode"]) for d in self.disks],
6568 hypervisor_name=self.op.hypervisor,
6571 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6575 def _ReadExportInfo(self):
6576 """Reads the export information from disk.
6578 It will override the opcode source node and path with the actual
6579 information, if these two were not specified before.
6581 @return: the export information
6584 assert self.op.mode == constants.INSTANCE_IMPORT
6586 src_node = self.op.src_node
6587 src_path = self.op.src_path
6589 if src_node is None:
6590 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6591 exp_list = self.rpc.call_export_list(locked_nodes)
6593 for node in exp_list:
6594 if exp_list[node].fail_msg:
6596 if src_path in exp_list[node].payload:
6598 self.op.src_node = src_node = node
6599 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6603 raise errors.OpPrereqError("No export found for relative path %s" %
6604 src_path, errors.ECODE_INVAL)
6606 _CheckNodeOnline(self, src_node)
6607 result = self.rpc.call_export_info(src_node, src_path)
6608 result.Raise("No export or invalid export found in dir %s" % src_path)
6610 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6611 if not export_info.has_section(constants.INISECT_EXP):
6612 raise errors.ProgrammerError("Corrupted export config",
6613 errors.ECODE_ENVIRON)
6615 ei_version = export_info.get(constants.INISECT_EXP, "version")
6616 if (int(ei_version) != constants.EXPORT_VERSION):
6617 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6618 (ei_version, constants.EXPORT_VERSION),
6619 errors.ECODE_ENVIRON)
6622 def _ReadExportParams(self, einfo):
6623 """Use export parameters as defaults.
6625 In case the opcode doesn't specify (as in override) some instance
6626 parameters, then try to use them from the export information, if
6630 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6632 if self.op.disk_template is None:
6633 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6634 self.op.disk_template = einfo.get(constants.INISECT_INS,
6637 raise errors.OpPrereqError("No disk template specified and the export"
6638 " is missing the disk_template information",
6641 if not self.op.disks:
6642 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6644 # TODO: import the disk iv_name too
6645 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6646 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6647 disks.append({"size": disk_sz})
6648 self.op.disks = disks
6650 raise errors.OpPrereqError("No disk info specified and the export"
6651 " is missing the disk information",
6654 if (not self.op.nics and
6655 einfo.has_option(constants.INISECT_INS, "nic_count")):
6657 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6659 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6660 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6665 if (self.op.hypervisor is None and
6666 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6667 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6668 if einfo.has_section(constants.INISECT_HYP):
6669 # use the export parameters but do not override the ones
6670 # specified by the user
6671 for name, value in einfo.items(constants.INISECT_HYP):
6672 if name not in self.op.hvparams:
6673 self.op.hvparams[name] = value
6675 if einfo.has_section(constants.INISECT_BEP):
6676 # use the parameters, without overriding
6677 for name, value in einfo.items(constants.INISECT_BEP):
6678 if name not in self.op.beparams:
6679 self.op.beparams[name] = value
6681 # try to read the parameters old style, from the main section
6682 for name in constants.BES_PARAMETERS:
6683 if (name not in self.op.beparams and
6684 einfo.has_option(constants.INISECT_INS, name)):
6685 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6687 if einfo.has_section(constants.INISECT_OSP):
6688 # use the parameters, without overriding
6689 for name, value in einfo.items(constants.INISECT_OSP):
6690 if name not in self.op.osparams:
6691 self.op.osparams[name] = value
6693 def _RevertToDefaults(self, cluster):
6694 """Revert the instance parameters to the default values.
6698 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6699 for name in self.op.hvparams.keys():
6700 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6701 del self.op.hvparams[name]
6703 be_defs = cluster.SimpleFillBE({})
6704 for name in self.op.beparams.keys():
6705 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6706 del self.op.beparams[name]
6708 nic_defs = cluster.SimpleFillNIC({})
6709 for nic in self.op.nics:
6710 for name in constants.NICS_PARAMETERS:
6711 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6714 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6715 for name in self.op.osparams.keys():
6716 if name in os_defs and os_defs[name] == self.op.osparams[name]:
6717 del self.op.osparams[name]
6719 def CheckPrereq(self):
6720 """Check prerequisites.
6723 if self.op.mode == constants.INSTANCE_IMPORT:
6724 export_info = self._ReadExportInfo()
6725 self._ReadExportParams(export_info)
6727 _CheckDiskTemplate(self.op.disk_template)
6729 if (not self.cfg.GetVGName() and
6730 self.op.disk_template not in constants.DTS_NOT_LVM):
6731 raise errors.OpPrereqError("Cluster does not support lvm-based"
6732 " instances", errors.ECODE_STATE)
6734 if self.op.hypervisor is None:
6735 self.op.hypervisor = self.cfg.GetHypervisorType()
6737 cluster = self.cfg.GetClusterInfo()
6738 enabled_hvs = cluster.enabled_hypervisors
6739 if self.op.hypervisor not in enabled_hvs:
6740 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6741 " cluster (%s)" % (self.op.hypervisor,
6742 ",".join(enabled_hvs)),
6745 # check hypervisor parameter syntax (locally)
6746 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6747 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6749 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6750 hv_type.CheckParameterSyntax(filled_hvp)
6751 self.hv_full = filled_hvp
6752 # check that we don't specify global parameters on an instance
6753 _CheckGlobalHvParams(self.op.hvparams)
6755 # fill and remember the beparams dict
6756 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6757 self.be_full = cluster.SimpleFillBE(self.op.beparams)
6759 # build os parameters
6760 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6762 # now that hvp/bep are in final format, let's reset to defaults,
6764 if self.op.identify_defaults:
6765 self._RevertToDefaults(cluster)
6769 for idx, nic in enumerate(self.op.nics):
6770 nic_mode_req = nic.get("mode", None)
6771 nic_mode = nic_mode_req
6772 if nic_mode is None:
6773 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6775 # in routed mode, for the first nic, the default ip is 'auto'
6776 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6777 default_ip_mode = constants.VALUE_AUTO
6779 default_ip_mode = constants.VALUE_NONE
6781 # ip validity checks
6782 ip = nic.get("ip", default_ip_mode)
6783 if ip is None or ip.lower() == constants.VALUE_NONE:
6785 elif ip.lower() == constants.VALUE_AUTO:
6786 if not self.op.name_check:
6787 raise errors.OpPrereqError("IP address set to auto but name checks"
6788 " have been skipped. Aborting.",
6790 nic_ip = self.hostname1.ip
6792 if not utils.IsValidIP(ip):
6793 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6794 " like a valid IP" % ip,
6798 # TODO: check the ip address for uniqueness
6799 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6800 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6803 # MAC address verification
6804 mac = nic.get("mac", constants.VALUE_AUTO)
6805 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6806 mac = utils.NormalizeAndValidateMac(mac)
6809 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6810 except errors.ReservationError:
6811 raise errors.OpPrereqError("MAC address %s already in use"
6812 " in cluster" % mac,
6813 errors.ECODE_NOTUNIQUE)
6815 # bridge verification
6816 bridge = nic.get("bridge", None)
6817 link = nic.get("link", None)
6819 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6820 " at the same time", errors.ECODE_INVAL)
6821 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6822 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6829 nicparams[constants.NIC_MODE] = nic_mode_req
6831 nicparams[constants.NIC_LINK] = link
6833 check_params = cluster.SimpleFillNIC(nicparams)
6834 objects.NIC.CheckParameterSyntax(check_params)
6835 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6837 # disk checks/pre-build
6839 for disk in self.op.disks:
6840 mode = disk.get("mode", constants.DISK_RDWR)
6841 if mode not in constants.DISK_ACCESS_SET:
6842 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6843 mode, errors.ECODE_INVAL)
6844 size = disk.get("size", None)
6846 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6849 except (TypeError, ValueError):
6850 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6852 new_disk = {"size": size, "mode": mode}
6854 new_disk["adopt"] = disk["adopt"]
6855 self.disks.append(new_disk)
6857 if self.op.mode == constants.INSTANCE_IMPORT:
6859 # Check that the new instance doesn't have less disks than the export
6860 instance_disks = len(self.disks)
6861 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6862 if instance_disks < export_disks:
6863 raise errors.OpPrereqError("Not enough disks to import."
6864 " (instance: %d, export: %d)" %
6865 (instance_disks, export_disks),
6869 for idx in range(export_disks):
6870 option = 'disk%d_dump' % idx
6871 if export_info.has_option(constants.INISECT_INS, option):
6872 # FIXME: are the old os-es, disk sizes, etc. useful?
6873 export_name = export_info.get(constants.INISECT_INS, option)
6874 image = utils.PathJoin(self.op.src_path, export_name)
6875 disk_images.append(image)
6877 disk_images.append(False)
6879 self.src_images = disk_images
6881 old_name = export_info.get(constants.INISECT_INS, 'name')
6883 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6884 except (TypeError, ValueError), err:
6885 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6886 " an integer: %s" % str(err),
6888 if self.op.instance_name == old_name:
6889 for idx, nic in enumerate(self.nics):
6890 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6891 nic_mac_ini = 'nic%d_mac' % idx
6892 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6894 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6896 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6897 if self.op.ip_check:
6898 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6899 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6900 (self.check_ip, self.op.instance_name),
6901 errors.ECODE_NOTUNIQUE)
6903 #### mac address generation
6904 # By generating here the mac address both the allocator and the hooks get
6905 # the real final mac address rather than the 'auto' or 'generate' value.
6906 # There is a race condition between the generation and the instance object
6907 # creation, which means that we know the mac is valid now, but we're not
6908 # sure it will be when we actually add the instance. If things go bad
6909 # adding the instance will abort because of a duplicate mac, and the
6910 # creation job will fail.
6911 for nic in self.nics:
6912 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6913 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6917 if self.op.iallocator is not None:
6918 self._RunAllocator()
6920 #### node related checks
6922 # check primary node
6923 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6924 assert self.pnode is not None, \
6925 "Cannot retrieve locked node %s" % self.op.pnode
6927 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6928 pnode.name, errors.ECODE_STATE)
6930 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6931 pnode.name, errors.ECODE_STATE)
6933 self.secondaries = []
6935 # mirror node verification
6936 if self.op.disk_template in constants.DTS_NET_MIRROR:
6937 if self.op.snode is None:
6938 raise errors.OpPrereqError("The networked disk templates need"
6939 " a mirror node", errors.ECODE_INVAL)
6940 if self.op.snode == pnode.name:
6941 raise errors.OpPrereqError("The secondary node cannot be the"
6942 " primary node.", errors.ECODE_INVAL)
6943 _CheckNodeOnline(self, self.op.snode)
6944 _CheckNodeNotDrained(self, self.op.snode)
6945 self.secondaries.append(self.op.snode)
6947 nodenames = [pnode.name] + self.secondaries
6949 req_size = _ComputeDiskSize(self.op.disk_template,
6952 # Check lv size requirements, if not adopting
6953 if req_size is not None and not self.adopt_disks:
6954 _CheckNodesFreeDisk(self, nodenames, req_size)
6956 if self.adopt_disks: # instead, we must check the adoption data
6957 all_lvs = set([i["adopt"] for i in self.disks])
6958 if len(all_lvs) != len(self.disks):
6959 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6961 for lv_name in all_lvs:
6963 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6964 except errors.ReservationError:
6965 raise errors.OpPrereqError("LV named %s used by another instance" %
6966 lv_name, errors.ECODE_NOTUNIQUE)
6968 node_lvs = self.rpc.call_lv_list([pnode.name],
6969 self.cfg.GetVGName())[pnode.name]
6970 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6971 node_lvs = node_lvs.payload
6972 delta = all_lvs.difference(node_lvs.keys())
6974 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6975 utils.CommaJoin(delta),
6977 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6979 raise errors.OpPrereqError("Online logical volumes found, cannot"
6980 " adopt: %s" % utils.CommaJoin(online_lvs),
6982 # update the size of disk based on what is found
6983 for dsk in self.disks:
6984 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6986 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6988 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6989 # check OS parameters (remotely)
6990 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
6992 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6994 # memory check on primary node
6996 _CheckNodeFreeMemory(self, self.pnode.name,
6997 "creating instance %s" % self.op.instance_name,
6998 self.be_full[constants.BE_MEMORY],
7001 self.dry_run_result = list(nodenames)
7003 def Exec(self, feedback_fn):
7004 """Create and add the instance to the cluster.
7007 instance = self.op.instance_name
7008 pnode_name = self.pnode.name
7010 ht_kind = self.op.hypervisor
7011 if ht_kind in constants.HTS_REQ_PORT:
7012 network_port = self.cfg.AllocatePort()
7016 if constants.ENABLE_FILE_STORAGE:
7017 # this is needed because os.path.join does not accept None arguments
7018 if self.op.file_storage_dir is None:
7019 string_file_storage_dir = ""
7021 string_file_storage_dir = self.op.file_storage_dir
7023 # build the full file storage dir path
7024 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7025 string_file_storage_dir, instance)
7027 file_storage_dir = ""
7029 disks = _GenerateDiskTemplate(self,
7030 self.op.disk_template,
7031 instance, pnode_name,
7035 self.op.file_driver,
7038 iobj = objects.Instance(name=instance, os=self.op.os_type,
7039 primary_node=pnode_name,
7040 nics=self.nics, disks=disks,
7041 disk_template=self.op.disk_template,
7043 network_port=network_port,
7044 beparams=self.op.beparams,
7045 hvparams=self.op.hvparams,
7046 hypervisor=self.op.hypervisor,
7047 osparams=self.op.osparams,
7050 if self.adopt_disks:
7051 # rename LVs to the newly-generated names; we need to construct
7052 # 'fake' LV disks with the old data, plus the new unique_id
7053 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7055 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7056 rename_to.append(t_dsk.logical_id)
7057 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7058 self.cfg.SetDiskID(t_dsk, pnode_name)
7059 result = self.rpc.call_blockdev_rename(pnode_name,
7060 zip(tmp_disks, rename_to))
7061 result.Raise("Failed to rename adoped LVs")
7063 feedback_fn("* creating instance disks...")
7065 _CreateDisks(self, iobj)
7066 except errors.OpExecError:
7067 self.LogWarning("Device creation failed, reverting...")
7069 _RemoveDisks(self, iobj)
7071 self.cfg.ReleaseDRBDMinors(instance)
7074 feedback_fn("adding instance %s to cluster config" % instance)
7076 self.cfg.AddInstance(iobj, self.proc.GetECId())
7078 # Declare that we don't want to remove the instance lock anymore, as we've
7079 # added the instance to the config
7080 del self.remove_locks[locking.LEVEL_INSTANCE]
7081 # Unlock all the nodes
7082 if self.op.mode == constants.INSTANCE_IMPORT:
7083 nodes_keep = [self.op.src_node]
7084 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7085 if node != self.op.src_node]
7086 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7087 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7089 self.context.glm.release(locking.LEVEL_NODE)
7090 del self.acquired_locks[locking.LEVEL_NODE]
7092 if self.op.wait_for_sync:
7093 disk_abort = not _WaitForSync(self, iobj)
7094 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7095 # make sure the disks are not degraded (still sync-ing is ok)
7097 feedback_fn("* checking mirrors status")
7098 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7103 _RemoveDisks(self, iobj)
7104 self.cfg.RemoveInstance(iobj.name)
7105 # Make sure the instance lock gets removed
7106 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7107 raise errors.OpExecError("There are some degraded disks for"
7110 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7111 if self.op.mode == constants.INSTANCE_CREATE:
7112 if not self.op.no_install:
7113 feedback_fn("* running the instance OS create scripts...")
7114 # FIXME: pass debug option from opcode to backend
7115 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7116 self.op.debug_level)
7117 result.Raise("Could not add os for instance %s"
7118 " on node %s" % (instance, pnode_name))
7120 elif self.op.mode == constants.INSTANCE_IMPORT:
7121 feedback_fn("* running the instance OS import scripts...")
7125 for idx, image in enumerate(self.src_images):
7129 # FIXME: pass debug option from opcode to backend
7130 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7131 constants.IEIO_FILE, (image, ),
7132 constants.IEIO_SCRIPT,
7133 (iobj.disks[idx], idx),
7135 transfers.append(dt)
7138 masterd.instance.TransferInstanceData(self, feedback_fn,
7139 self.op.src_node, pnode_name,
7140 self.pnode.secondary_ip,
7142 if not compat.all(import_result):
7143 self.LogWarning("Some disks for instance %s on node %s were not"
7144 " imported successfully" % (instance, pnode_name))
7146 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7147 feedback_fn("* preparing remote import...")
7148 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7149 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7151 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7152 self.source_x509_ca,
7153 self._cds, timeouts)
7154 if not compat.all(disk_results):
7155 # TODO: Should the instance still be started, even if some disks
7156 # failed to import (valid for local imports, too)?
7157 self.LogWarning("Some disks for instance %s on node %s were not"
7158 " imported successfully" % (instance, pnode_name))
7160 # Run rename script on newly imported instance
7161 assert iobj.name == instance
7162 feedback_fn("Running rename script for %s" % instance)
7163 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7164 self.source_instance_name,
7165 self.op.debug_level)
7167 self.LogWarning("Failed to run rename script for %s on node"
7168 " %s: %s" % (instance, pnode_name, result.fail_msg))
7171 # also checked in the prereq part
7172 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7176 iobj.admin_up = True
7177 self.cfg.Update(iobj, feedback_fn)
7178 logging.info("Starting instance %s on node %s", instance, pnode_name)
7179 feedback_fn("* starting instance...")
7180 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7181 result.Raise("Could not start instance")
7183 return list(iobj.all_nodes)
7186 class LUConnectConsole(NoHooksLU):
7187 """Connect to an instance's console.
7189 This is somewhat special in that it returns the command line that
7190 you need to run on the master node in order to connect to the
7194 _OP_REQP = ["instance_name"]
7197 def ExpandNames(self):
7198 self._ExpandAndLockInstance()
7200 def CheckPrereq(self):
7201 """Check prerequisites.
7203 This checks that the instance is in the cluster.
7206 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7207 assert self.instance is not None, \
7208 "Cannot retrieve locked instance %s" % self.op.instance_name
7209 _CheckNodeOnline(self, self.instance.primary_node)
7211 def Exec(self, feedback_fn):
7212 """Connect to the console of an instance
7215 instance = self.instance
7216 node = instance.primary_node
7218 node_insts = self.rpc.call_instance_list([node],
7219 [instance.hypervisor])[node]
7220 node_insts.Raise("Can't get node information from %s" % node)
7222 if instance.name not in node_insts.payload:
7223 raise errors.OpExecError("Instance %s is not running." % instance.name)
7225 logging.debug("Connecting to console of %s on %s", instance.name, node)
7227 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7228 cluster = self.cfg.GetClusterInfo()
7229 # beparams and hvparams are passed separately, to avoid editing the
7230 # instance and then saving the defaults in the instance itself.
7231 hvparams = cluster.FillHV(instance)
7232 beparams = cluster.FillBE(instance)
7233 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7236 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7239 class LUReplaceDisks(LogicalUnit):
7240 """Replace the disks of an instance.
7243 HPATH = "mirrors-replace"
7244 HTYPE = constants.HTYPE_INSTANCE
7245 _OP_REQP = ["instance_name", "mode", "disks"]
7247 ("remote_node", None),
7248 ("iallocator", None),
7249 ("early_release", None),
7253 def CheckArguments(self):
7254 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7257 def ExpandNames(self):
7258 self._ExpandAndLockInstance()
7260 if self.op.iallocator is not None:
7261 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7263 elif self.op.remote_node is not None:
7264 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7265 self.op.remote_node = remote_node
7267 # Warning: do not remove the locking of the new secondary here
7268 # unless DRBD8.AddChildren is changed to work in parallel;
7269 # currently it doesn't since parallel invocations of
7270 # FindUnusedMinor will conflict
7271 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7272 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7275 self.needed_locks[locking.LEVEL_NODE] = []
7276 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7278 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7279 self.op.iallocator, self.op.remote_node,
7280 self.op.disks, False, self.op.early_release)
7282 self.tasklets = [self.replacer]
7284 def DeclareLocks(self, level):
7285 # If we're not already locking all nodes in the set we have to declare the
7286 # instance's primary/secondary nodes.
7287 if (level == locking.LEVEL_NODE and
7288 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7289 self._LockInstancesNodes()
7291 def BuildHooksEnv(self):
7294 This runs on the master, the primary and all the secondaries.
7297 instance = self.replacer.instance
7299 "MODE": self.op.mode,
7300 "NEW_SECONDARY": self.op.remote_node,
7301 "OLD_SECONDARY": instance.secondary_nodes[0],
7303 env.update(_BuildInstanceHookEnvByObject(self, instance))
7305 self.cfg.GetMasterNode(),
7306 instance.primary_node,
7308 if self.op.remote_node is not None:
7309 nl.append(self.op.remote_node)
7313 class LUEvacuateNode(LogicalUnit):
7314 """Relocate the secondary instances from a node.
7317 HPATH = "node-evacuate"
7318 HTYPE = constants.HTYPE_NODE
7319 _OP_REQP = ["node_name"]
7321 ("remote_node", None),
7322 ("iallocator", None),
7323 ("early_release", False),
7327 def CheckArguments(self):
7328 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7329 self.op.remote_node,
7332 def ExpandNames(self):
7333 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7335 self.needed_locks = {}
7337 # Declare node locks
7338 if self.op.iallocator is not None:
7339 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7341 elif self.op.remote_node is not None:
7342 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7344 # Warning: do not remove the locking of the new secondary here
7345 # unless DRBD8.AddChildren is changed to work in parallel;
7346 # currently it doesn't since parallel invocations of
7347 # FindUnusedMinor will conflict
7348 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7349 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7352 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7354 # Create tasklets for replacing disks for all secondary instances on this
7359 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7360 logging.debug("Replacing disks for instance %s", inst.name)
7361 names.append(inst.name)
7363 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7364 self.op.iallocator, self.op.remote_node, [],
7365 True, self.op.early_release)
7366 tasklets.append(replacer)
7368 self.tasklets = tasklets
7369 self.instance_names = names
7371 # Declare instance locks
7372 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7374 def DeclareLocks(self, level):
7375 # If we're not already locking all nodes in the set we have to declare the
7376 # instance's primary/secondary nodes.
7377 if (level == locking.LEVEL_NODE and
7378 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7379 self._LockInstancesNodes()
7381 def BuildHooksEnv(self):
7384 This runs on the master, the primary and all the secondaries.
7388 "NODE_NAME": self.op.node_name,
7391 nl = [self.cfg.GetMasterNode()]
7393 if self.op.remote_node is not None:
7394 env["NEW_SECONDARY"] = self.op.remote_node
7395 nl.append(self.op.remote_node)
7397 return (env, nl, nl)
7400 class TLReplaceDisks(Tasklet):
7401 """Replaces disks for an instance.
7403 Note: Locking is not within the scope of this class.
7406 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7407 disks, delay_iallocator, early_release):
7408 """Initializes this class.
7411 Tasklet.__init__(self, lu)
7414 self.instance_name = instance_name
7416 self.iallocator_name = iallocator_name
7417 self.remote_node = remote_node
7419 self.delay_iallocator = delay_iallocator
7420 self.early_release = early_release
7423 self.instance = None
7424 self.new_node = None
7425 self.target_node = None
7426 self.other_node = None
7427 self.remote_node_info = None
7428 self.node_secondary_ip = None
7431 def CheckArguments(mode, remote_node, iallocator):
7432 """Helper function for users of this class.
7435 # check for valid parameter combination
7436 if mode == constants.REPLACE_DISK_CHG:
7437 if remote_node is None and iallocator is None:
7438 raise errors.OpPrereqError("When changing the secondary either an"
7439 " iallocator script must be used or the"
7440 " new node given", errors.ECODE_INVAL)
7442 if remote_node is not None and iallocator is not None:
7443 raise errors.OpPrereqError("Give either the iallocator or the new"
7444 " secondary, not both", errors.ECODE_INVAL)
7446 elif remote_node is not None or iallocator is not None:
7447 # Not replacing the secondary
7448 raise errors.OpPrereqError("The iallocator and new node options can"
7449 " only be used when changing the"
7450 " secondary node", errors.ECODE_INVAL)
7453 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7454 """Compute a new secondary node using an IAllocator.
7457 ial = IAllocator(lu.cfg, lu.rpc,
7458 mode=constants.IALLOCATOR_MODE_RELOC,
7460 relocate_from=relocate_from)
7462 ial.Run(iallocator_name)
7465 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7466 " %s" % (iallocator_name, ial.info),
7469 if len(ial.result) != ial.required_nodes:
7470 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7471 " of nodes (%s), required %s" %
7473 len(ial.result), ial.required_nodes),
7476 remote_node_name = ial.result[0]
7478 lu.LogInfo("Selected new secondary for instance '%s': %s",
7479 instance_name, remote_node_name)
7481 return remote_node_name
7483 def _FindFaultyDisks(self, node_name):
7484 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7487 def CheckPrereq(self):
7488 """Check prerequisites.
7490 This checks that the instance is in the cluster.
7493 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7494 assert instance is not None, \
7495 "Cannot retrieve locked instance %s" % self.instance_name
7497 if instance.disk_template != constants.DT_DRBD8:
7498 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7499 " instances", errors.ECODE_INVAL)
7501 if len(instance.secondary_nodes) != 1:
7502 raise errors.OpPrereqError("The instance has a strange layout,"
7503 " expected one secondary but found %d" %
7504 len(instance.secondary_nodes),
7507 if not self.delay_iallocator:
7508 self._CheckPrereq2()
7510 def _CheckPrereq2(self):
7511 """Check prerequisites, second part.
7513 This function should always be part of CheckPrereq. It was separated and is
7514 now called from Exec because during node evacuation iallocator was only
7515 called with an unmodified cluster model, not taking planned changes into
7519 instance = self.instance
7520 secondary_node = instance.secondary_nodes[0]
7522 if self.iallocator_name is None:
7523 remote_node = self.remote_node
7525 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7526 instance.name, instance.secondary_nodes)
7528 if remote_node is not None:
7529 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7530 assert self.remote_node_info is not None, \
7531 "Cannot retrieve locked node %s" % remote_node
7533 self.remote_node_info = None
7535 if remote_node == self.instance.primary_node:
7536 raise errors.OpPrereqError("The specified node is the primary node of"
7537 " the instance.", errors.ECODE_INVAL)
7539 if remote_node == secondary_node:
7540 raise errors.OpPrereqError("The specified node is already the"
7541 " secondary node of the instance.",
7544 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7545 constants.REPLACE_DISK_CHG):
7546 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7549 if self.mode == constants.REPLACE_DISK_AUTO:
7550 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7551 faulty_secondary = self._FindFaultyDisks(secondary_node)
7553 if faulty_primary and faulty_secondary:
7554 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7555 " one node and can not be repaired"
7556 " automatically" % self.instance_name,
7560 self.disks = faulty_primary
7561 self.target_node = instance.primary_node
7562 self.other_node = secondary_node
7563 check_nodes = [self.target_node, self.other_node]
7564 elif faulty_secondary:
7565 self.disks = faulty_secondary
7566 self.target_node = secondary_node
7567 self.other_node = instance.primary_node
7568 check_nodes = [self.target_node, self.other_node]
7574 # Non-automatic modes
7575 if self.mode == constants.REPLACE_DISK_PRI:
7576 self.target_node = instance.primary_node
7577 self.other_node = secondary_node
7578 check_nodes = [self.target_node, self.other_node]
7580 elif self.mode == constants.REPLACE_DISK_SEC:
7581 self.target_node = secondary_node
7582 self.other_node = instance.primary_node
7583 check_nodes = [self.target_node, self.other_node]
7585 elif self.mode == constants.REPLACE_DISK_CHG:
7586 self.new_node = remote_node
7587 self.other_node = instance.primary_node
7588 self.target_node = secondary_node
7589 check_nodes = [self.new_node, self.other_node]
7591 _CheckNodeNotDrained(self.lu, remote_node)
7593 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7594 assert old_node_info is not None
7595 if old_node_info.offline and not self.early_release:
7596 # doesn't make sense to delay the release
7597 self.early_release = True
7598 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7599 " early-release mode", secondary_node)
7602 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7605 # If not specified all disks should be replaced
7607 self.disks = range(len(self.instance.disks))
7609 for node in check_nodes:
7610 _CheckNodeOnline(self.lu, node)
7612 # Check whether disks are valid
7613 for disk_idx in self.disks:
7614 instance.FindDisk(disk_idx)
7616 # Get secondary node IP addresses
7619 for node_name in [self.target_node, self.other_node, self.new_node]:
7620 if node_name is not None:
7621 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7623 self.node_secondary_ip = node_2nd_ip
7625 def Exec(self, feedback_fn):
7626 """Execute disk replacement.
7628 This dispatches the disk replacement to the appropriate handler.
7631 if self.delay_iallocator:
7632 self._CheckPrereq2()
7635 feedback_fn("No disks need replacement")
7638 feedback_fn("Replacing disk(s) %s for %s" %
7639 (utils.CommaJoin(self.disks), self.instance.name))
7641 activate_disks = (not self.instance.admin_up)
7643 # Activate the instance disks if we're replacing them on a down instance
7645 _StartInstanceDisks(self.lu, self.instance, True)
7648 # Should we replace the secondary node?
7649 if self.new_node is not None:
7650 fn = self._ExecDrbd8Secondary
7652 fn = self._ExecDrbd8DiskOnly
7654 return fn(feedback_fn)
7657 # Deactivate the instance disks if we're replacing them on a
7660 _SafeShutdownInstanceDisks(self.lu, self.instance)
7662 def _CheckVolumeGroup(self, nodes):
7663 self.lu.LogInfo("Checking volume groups")
7665 vgname = self.cfg.GetVGName()
7667 # Make sure volume group exists on all involved nodes
7668 results = self.rpc.call_vg_list(nodes)
7670 raise errors.OpExecError("Can't list volume groups on the nodes")
7674 res.Raise("Error checking node %s" % node)
7675 if vgname not in res.payload:
7676 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7679 def _CheckDisksExistence(self, nodes):
7680 # Check disk existence
7681 for idx, dev in enumerate(self.instance.disks):
7682 if idx not in self.disks:
7686 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7687 self.cfg.SetDiskID(dev, node)
7689 result = self.rpc.call_blockdev_find(node, dev)
7691 msg = result.fail_msg
7692 if msg or not result.payload:
7694 msg = "disk not found"
7695 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7698 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7699 for idx, dev in enumerate(self.instance.disks):
7700 if idx not in self.disks:
7703 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7706 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7708 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7709 " replace disks for instance %s" %
7710 (node_name, self.instance.name))
7712 def _CreateNewStorage(self, node_name):
7713 vgname = self.cfg.GetVGName()
7716 for idx, dev in enumerate(self.instance.disks):
7717 if idx not in self.disks:
7720 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7722 self.cfg.SetDiskID(dev, node_name)
7724 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7725 names = _GenerateUniqueNames(self.lu, lv_names)
7727 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7728 logical_id=(vgname, names[0]))
7729 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7730 logical_id=(vgname, names[1]))
7732 new_lvs = [lv_data, lv_meta]
7733 old_lvs = dev.children
7734 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7736 # we pass force_create=True to force the LVM creation
7737 for new_lv in new_lvs:
7738 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7739 _GetInstanceInfoText(self.instance), False)
7743 def _CheckDevices(self, node_name, iv_names):
7744 for name, (dev, _, _) in iv_names.iteritems():
7745 self.cfg.SetDiskID(dev, node_name)
7747 result = self.rpc.call_blockdev_find(node_name, dev)
7749 msg = result.fail_msg
7750 if msg or not result.payload:
7752 msg = "disk not found"
7753 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7756 if result.payload.is_degraded:
7757 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7759 def _RemoveOldStorage(self, node_name, iv_names):
7760 for name, (_, old_lvs, _) in iv_names.iteritems():
7761 self.lu.LogInfo("Remove logical volumes for %s" % name)
7764 self.cfg.SetDiskID(lv, node_name)
7766 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7768 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7769 hint="remove unused LVs manually")
7771 def _ReleaseNodeLock(self, node_name):
7772 """Releases the lock for a given node."""
7773 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7775 def _ExecDrbd8DiskOnly(self, feedback_fn):
7776 """Replace a disk on the primary or secondary for DRBD 8.
7778 The algorithm for replace is quite complicated:
7780 1. for each disk to be replaced:
7782 1. create new LVs on the target node with unique names
7783 1. detach old LVs from the drbd device
7784 1. rename old LVs to name_replaced.<time_t>
7785 1. rename new LVs to old LVs
7786 1. attach the new LVs (with the old names now) to the drbd device
7788 1. wait for sync across all devices
7790 1. for each modified disk:
7792 1. remove old LVs (which have the name name_replaces.<time_t>)
7794 Failures are not very well handled.
7799 # Step: check device activation
7800 self.lu.LogStep(1, steps_total, "Check device existence")
7801 self._CheckDisksExistence([self.other_node, self.target_node])
7802 self._CheckVolumeGroup([self.target_node, self.other_node])
7804 # Step: check other node consistency
7805 self.lu.LogStep(2, steps_total, "Check peer consistency")
7806 self._CheckDisksConsistency(self.other_node,
7807 self.other_node == self.instance.primary_node,
7810 # Step: create new storage
7811 self.lu.LogStep(3, steps_total, "Allocate new storage")
7812 iv_names = self._CreateNewStorage(self.target_node)
7814 # Step: for each lv, detach+rename*2+attach
7815 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7816 for dev, old_lvs, new_lvs in iv_names.itervalues():
7817 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7819 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7821 result.Raise("Can't detach drbd from local storage on node"
7822 " %s for device %s" % (self.target_node, dev.iv_name))
7824 #cfg.Update(instance)
7826 # ok, we created the new LVs, so now we know we have the needed
7827 # storage; as such, we proceed on the target node to rename
7828 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7829 # using the assumption that logical_id == physical_id (which in
7830 # turn is the unique_id on that node)
7832 # FIXME(iustin): use a better name for the replaced LVs
7833 temp_suffix = int(time.time())
7834 ren_fn = lambda d, suff: (d.physical_id[0],
7835 d.physical_id[1] + "_replaced-%s" % suff)
7837 # Build the rename list based on what LVs exist on the node
7838 rename_old_to_new = []
7839 for to_ren in old_lvs:
7840 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7841 if not result.fail_msg and result.payload:
7843 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7845 self.lu.LogInfo("Renaming the old LVs on the target node")
7846 result = self.rpc.call_blockdev_rename(self.target_node,
7848 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7850 # Now we rename the new LVs to the old LVs
7851 self.lu.LogInfo("Renaming the new LVs on the target node")
7852 rename_new_to_old = [(new, old.physical_id)
7853 for old, new in zip(old_lvs, new_lvs)]
7854 result = self.rpc.call_blockdev_rename(self.target_node,
7856 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7858 for old, new in zip(old_lvs, new_lvs):
7859 new.logical_id = old.logical_id
7860 self.cfg.SetDiskID(new, self.target_node)
7862 for disk in old_lvs:
7863 disk.logical_id = ren_fn(disk, temp_suffix)
7864 self.cfg.SetDiskID(disk, self.target_node)
7866 # Now that the new lvs have the old name, we can add them to the device
7867 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7868 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7870 msg = result.fail_msg
7872 for new_lv in new_lvs:
7873 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7876 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7877 hint=("cleanup manually the unused logical"
7879 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7881 dev.children = new_lvs
7883 self.cfg.Update(self.instance, feedback_fn)
7886 if self.early_release:
7887 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7889 self._RemoveOldStorage(self.target_node, iv_names)
7890 # WARNING: we release both node locks here, do not do other RPCs
7891 # than WaitForSync to the primary node
7892 self._ReleaseNodeLock([self.target_node, self.other_node])
7895 # This can fail as the old devices are degraded and _WaitForSync
7896 # does a combined result over all disks, so we don't check its return value
7897 self.lu.LogStep(cstep, steps_total, "Sync devices")
7899 _WaitForSync(self.lu, self.instance)
7901 # Check all devices manually
7902 self._CheckDevices(self.instance.primary_node, iv_names)
7904 # Step: remove old storage
7905 if not self.early_release:
7906 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7908 self._RemoveOldStorage(self.target_node, iv_names)
7910 def _ExecDrbd8Secondary(self, feedback_fn):
7911 """Replace the secondary node for DRBD 8.
7913 The algorithm for replace is quite complicated:
7914 - for all disks of the instance:
7915 - create new LVs on the new node with same names
7916 - shutdown the drbd device on the old secondary
7917 - disconnect the drbd network on the primary
7918 - create the drbd device on the new secondary
7919 - network attach the drbd on the primary, using an artifice:
7920 the drbd code for Attach() will connect to the network if it
7921 finds a device which is connected to the good local disks but
7923 - wait for sync across all devices
7924 - remove all disks from the old secondary
7926 Failures are not very well handled.
7931 # Step: check device activation
7932 self.lu.LogStep(1, steps_total, "Check device existence")
7933 self._CheckDisksExistence([self.instance.primary_node])
7934 self._CheckVolumeGroup([self.instance.primary_node])
7936 # Step: check other node consistency
7937 self.lu.LogStep(2, steps_total, "Check peer consistency")
7938 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7940 # Step: create new storage
7941 self.lu.LogStep(3, steps_total, "Allocate new storage")
7942 for idx, dev in enumerate(self.instance.disks):
7943 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7944 (self.new_node, idx))
7945 # we pass force_create=True to force LVM creation
7946 for new_lv in dev.children:
7947 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7948 _GetInstanceInfoText(self.instance), False)
7950 # Step 4: dbrd minors and drbd setups changes
7951 # after this, we must manually remove the drbd minors on both the
7952 # error and the success paths
7953 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7954 minors = self.cfg.AllocateDRBDMinor([self.new_node
7955 for dev in self.instance.disks],
7957 logging.debug("Allocated minors %r", minors)
7960 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7961 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7962 (self.new_node, idx))
7963 # create new devices on new_node; note that we create two IDs:
7964 # one without port, so the drbd will be activated without
7965 # networking information on the new node at this stage, and one
7966 # with network, for the latter activation in step 4
7967 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7968 if self.instance.primary_node == o_node1:
7971 assert self.instance.primary_node == o_node2, "Three-node instance?"
7974 new_alone_id = (self.instance.primary_node, self.new_node, None,
7975 p_minor, new_minor, o_secret)
7976 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7977 p_minor, new_minor, o_secret)
7979 iv_names[idx] = (dev, dev.children, new_net_id)
7980 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7982 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7983 logical_id=new_alone_id,
7984 children=dev.children,
7987 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7988 _GetInstanceInfoText(self.instance), False)
7989 except errors.GenericError:
7990 self.cfg.ReleaseDRBDMinors(self.instance.name)
7993 # We have new devices, shutdown the drbd on the old secondary
7994 for idx, dev in enumerate(self.instance.disks):
7995 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7996 self.cfg.SetDiskID(dev, self.target_node)
7997 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7999 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8000 "node: %s" % (idx, msg),
8001 hint=("Please cleanup this device manually as"
8002 " soon as possible"))
8004 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8005 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8006 self.node_secondary_ip,
8007 self.instance.disks)\
8008 [self.instance.primary_node]
8010 msg = result.fail_msg
8012 # detaches didn't succeed (unlikely)
8013 self.cfg.ReleaseDRBDMinors(self.instance.name)
8014 raise errors.OpExecError("Can't detach the disks from the network on"
8015 " old node: %s" % (msg,))
8017 # if we managed to detach at least one, we update all the disks of
8018 # the instance to point to the new secondary
8019 self.lu.LogInfo("Updating instance configuration")
8020 for dev, _, new_logical_id in iv_names.itervalues():
8021 dev.logical_id = new_logical_id
8022 self.cfg.SetDiskID(dev, self.instance.primary_node)
8024 self.cfg.Update(self.instance, feedback_fn)
8026 # and now perform the drbd attach
8027 self.lu.LogInfo("Attaching primary drbds to new secondary"
8028 " (standalone => connected)")
8029 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8031 self.node_secondary_ip,
8032 self.instance.disks,
8035 for to_node, to_result in result.items():
8036 msg = to_result.fail_msg
8038 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8040 hint=("please do a gnt-instance info to see the"
8041 " status of disks"))
8043 if self.early_release:
8044 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8046 self._RemoveOldStorage(self.target_node, iv_names)
8047 # WARNING: we release all node locks here, do not do other RPCs
8048 # than WaitForSync to the primary node
8049 self._ReleaseNodeLock([self.instance.primary_node,
8054 # This can fail as the old devices are degraded and _WaitForSync
8055 # does a combined result over all disks, so we don't check its return value
8056 self.lu.LogStep(cstep, steps_total, "Sync devices")
8058 _WaitForSync(self.lu, self.instance)
8060 # Check all devices manually
8061 self._CheckDevices(self.instance.primary_node, iv_names)
8063 # Step: remove old storage
8064 if not self.early_release:
8065 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8066 self._RemoveOldStorage(self.target_node, iv_names)
8069 class LURepairNodeStorage(NoHooksLU):
8070 """Repairs the volume group on a node.
8073 _OP_REQP = ["node_name"]
8076 def CheckArguments(self):
8077 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8079 _CheckStorageType(self.op.storage_type)
8081 def ExpandNames(self):
8082 self.needed_locks = {
8083 locking.LEVEL_NODE: [self.op.node_name],
8086 def _CheckFaultyDisks(self, instance, node_name):
8087 """Ensure faulty disks abort the opcode or at least warn."""
8089 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8091 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8092 " node '%s'" % (instance.name, node_name),
8094 except errors.OpPrereqError, err:
8095 if self.op.ignore_consistency:
8096 self.proc.LogWarning(str(err.args[0]))
8100 def CheckPrereq(self):
8101 """Check prerequisites.
8104 storage_type = self.op.storage_type
8106 if (constants.SO_FIX_CONSISTENCY not in
8107 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8108 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8109 " repaired" % storage_type,
8112 # Check whether any instance on this node has faulty disks
8113 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8114 if not inst.admin_up:
8116 check_nodes = set(inst.all_nodes)
8117 check_nodes.discard(self.op.node_name)
8118 for inst_node_name in check_nodes:
8119 self._CheckFaultyDisks(inst, inst_node_name)
8121 def Exec(self, feedback_fn):
8122 feedback_fn("Repairing storage unit '%s' on %s ..." %
8123 (self.op.name, self.op.node_name))
8125 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8126 result = self.rpc.call_storage_execute(self.op.node_name,
8127 self.op.storage_type, st_args,
8129 constants.SO_FIX_CONSISTENCY)
8130 result.Raise("Failed to repair storage unit '%s' on %s" %
8131 (self.op.name, self.op.node_name))
8134 class LUNodeEvacuationStrategy(NoHooksLU):
8135 """Computes the node evacuation strategy.
8138 _OP_REQP = ["nodes"]
8140 ("remote_node", None),
8141 ("iallocator", None),
8145 def CheckArguments(self):
8146 if self.op.remote_node is not None and self.op.iallocator is not None:
8147 raise errors.OpPrereqError("Give either the iallocator or the new"
8148 " secondary, not both", errors.ECODE_INVAL)
8150 def ExpandNames(self):
8151 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8152 self.needed_locks = locks = {}
8153 if self.op.remote_node is None:
8154 locks[locking.LEVEL_NODE] = locking.ALL_SET
8156 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8157 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8159 def CheckPrereq(self):
8162 def Exec(self, feedback_fn):
8163 if self.op.remote_node is not None:
8165 for node in self.op.nodes:
8166 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8169 if i.primary_node == self.op.remote_node:
8170 raise errors.OpPrereqError("Node %s is the primary node of"
8171 " instance %s, cannot use it as"
8173 (self.op.remote_node, i.name),
8175 result.append([i.name, self.op.remote_node])
8177 ial = IAllocator(self.cfg, self.rpc,
8178 mode=constants.IALLOCATOR_MODE_MEVAC,
8179 evac_nodes=self.op.nodes)
8180 ial.Run(self.op.iallocator, validate=True)
8182 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8188 class LUGrowDisk(LogicalUnit):
8189 """Grow a disk of an instance.
8193 HTYPE = constants.HTYPE_INSTANCE
8194 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
8197 def ExpandNames(self):
8198 self._ExpandAndLockInstance()
8199 self.needed_locks[locking.LEVEL_NODE] = []
8200 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8202 def DeclareLocks(self, level):
8203 if level == locking.LEVEL_NODE:
8204 self._LockInstancesNodes()
8206 def BuildHooksEnv(self):
8209 This runs on the master, the primary and all the secondaries.
8213 "DISK": self.op.disk,
8214 "AMOUNT": self.op.amount,
8216 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8217 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8220 def CheckPrereq(self):
8221 """Check prerequisites.
8223 This checks that the instance is in the cluster.
8226 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8227 assert instance is not None, \
8228 "Cannot retrieve locked instance %s" % self.op.instance_name
8229 nodenames = list(instance.all_nodes)
8230 for node in nodenames:
8231 _CheckNodeOnline(self, node)
8234 self.instance = instance
8236 if instance.disk_template not in constants.DTS_GROWABLE:
8237 raise errors.OpPrereqError("Instance's disk layout does not support"
8238 " growing.", errors.ECODE_INVAL)
8240 self.disk = instance.FindDisk(self.op.disk)
8242 if instance.disk_template != constants.DT_FILE:
8243 # TODO: check the free disk space for file, when that feature will be
8245 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8247 def Exec(self, feedback_fn):
8248 """Execute disk grow.
8251 instance = self.instance
8254 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8256 raise errors.OpExecError("Cannot activate block device to grow")
8258 for node in instance.all_nodes:
8259 self.cfg.SetDiskID(disk, node)
8260 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8261 result.Raise("Grow request failed to node %s" % node)
8263 # TODO: Rewrite code to work properly
8264 # DRBD goes into sync mode for a short amount of time after executing the
8265 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8266 # calling "resize" in sync mode fails. Sleeping for a short amount of
8267 # time is a work-around.
8270 disk.RecordGrow(self.op.amount)
8271 self.cfg.Update(instance, feedback_fn)
8272 if self.op.wait_for_sync:
8273 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8275 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8276 " status.\nPlease check the instance.")
8277 if not instance.admin_up:
8278 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8279 elif not instance.admin_up:
8280 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8281 " not supposed to be running because no wait for"
8282 " sync mode was requested.")
8285 class LUQueryInstanceData(NoHooksLU):
8286 """Query runtime instance data.
8289 _OP_REQP = ["instances", "static"]
8292 def CheckArguments(self):
8293 if not isinstance(self.op.instances, list):
8294 raise errors.OpPrereqError("Invalid argument type 'instances'",
8297 def ExpandNames(self):
8298 self.needed_locks = {}
8299 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8301 if self.op.instances:
8302 self.wanted_names = []
8303 for name in self.op.instances:
8304 full_name = _ExpandInstanceName(self.cfg, name)
8305 self.wanted_names.append(full_name)
8306 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8308 self.wanted_names = None
8309 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8311 self.needed_locks[locking.LEVEL_NODE] = []
8312 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8314 def DeclareLocks(self, level):
8315 if level == locking.LEVEL_NODE:
8316 self._LockInstancesNodes()
8318 def CheckPrereq(self):
8319 """Check prerequisites.
8321 This only checks the optional instance list against the existing names.
8324 if self.wanted_names is None:
8325 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8327 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8328 in self.wanted_names]
8331 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8332 """Returns the status of a block device
8335 if self.op.static or not node:
8338 self.cfg.SetDiskID(dev, node)
8340 result = self.rpc.call_blockdev_find(node, dev)
8344 result.Raise("Can't compute disk status for %s" % instance_name)
8346 status = result.payload
8350 return (status.dev_path, status.major, status.minor,
8351 status.sync_percent, status.estimated_time,
8352 status.is_degraded, status.ldisk_status)
8354 def _ComputeDiskStatus(self, instance, snode, dev):
8355 """Compute block device status.
8358 if dev.dev_type in constants.LDS_DRBD:
8359 # we change the snode then (otherwise we use the one passed in)
8360 if dev.logical_id[0] == instance.primary_node:
8361 snode = dev.logical_id[1]
8363 snode = dev.logical_id[0]
8365 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8367 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8370 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8371 for child in dev.children]
8376 "iv_name": dev.iv_name,
8377 "dev_type": dev.dev_type,
8378 "logical_id": dev.logical_id,
8379 "physical_id": dev.physical_id,
8380 "pstatus": dev_pstatus,
8381 "sstatus": dev_sstatus,
8382 "children": dev_children,
8389 def Exec(self, feedback_fn):
8390 """Gather and return data"""
8393 cluster = self.cfg.GetClusterInfo()
8395 for instance in self.wanted_instances:
8396 if not self.op.static:
8397 remote_info = self.rpc.call_instance_info(instance.primary_node,
8399 instance.hypervisor)
8400 remote_info.Raise("Error checking node %s" % instance.primary_node)
8401 remote_info = remote_info.payload
8402 if remote_info and "state" in remote_info:
8405 remote_state = "down"
8408 if instance.admin_up:
8411 config_state = "down"
8413 disks = [self._ComputeDiskStatus(instance, None, device)
8414 for device in instance.disks]
8417 "name": instance.name,
8418 "config_state": config_state,
8419 "run_state": remote_state,
8420 "pnode": instance.primary_node,
8421 "snodes": instance.secondary_nodes,
8423 # this happens to be the same format used for hooks
8424 "nics": _NICListToTuple(self, instance.nics),
8425 "disk_template": instance.disk_template,
8427 "hypervisor": instance.hypervisor,
8428 "network_port": instance.network_port,
8429 "hv_instance": instance.hvparams,
8430 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8431 "be_instance": instance.beparams,
8432 "be_actual": cluster.FillBE(instance),
8433 "os_instance": instance.osparams,
8434 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8435 "serial_no": instance.serial_no,
8436 "mtime": instance.mtime,
8437 "ctime": instance.ctime,
8438 "uuid": instance.uuid,
8441 result[instance.name] = idict
8446 class LUSetInstanceParams(LogicalUnit):
8447 """Modifies an instances's parameters.
8450 HPATH = "instance-modify"
8451 HTYPE = constants.HTYPE_INSTANCE
8452 _OP_REQP = ["instance_name"]
8454 ("nics", _EmptyList),
8455 ("disks", _EmptyList),
8456 ("beparams", _EmptyDict),
8457 ("hvparams", _EmptyDict),
8458 ("disk_template", None),
8459 ("remote_node", None),
8461 ("force_variant", False),
8467 def CheckArguments(self):
8468 if not (self.op.nics or self.op.disks or self.op.disk_template or
8469 self.op.hvparams or self.op.beparams or self.op.os_name):
8470 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8472 if self.op.hvparams:
8473 _CheckGlobalHvParams(self.op.hvparams)
8477 for disk_op, disk_dict in self.op.disks:
8478 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8479 if disk_op == constants.DDM_REMOVE:
8482 elif disk_op == constants.DDM_ADD:
8485 if not isinstance(disk_op, int):
8486 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8487 if not isinstance(disk_dict, dict):
8488 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8489 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8491 if disk_op == constants.DDM_ADD:
8492 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8493 if mode not in constants.DISK_ACCESS_SET:
8494 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8496 size = disk_dict.get('size', None)
8498 raise errors.OpPrereqError("Required disk parameter size missing",
8502 except (TypeError, ValueError), err:
8503 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8504 str(err), errors.ECODE_INVAL)
8505 disk_dict['size'] = size
8507 # modification of disk
8508 if 'size' in disk_dict:
8509 raise errors.OpPrereqError("Disk size change not possible, use"
8510 " grow-disk", errors.ECODE_INVAL)
8512 if disk_addremove > 1:
8513 raise errors.OpPrereqError("Only one disk add or remove operation"
8514 " supported at a time", errors.ECODE_INVAL)
8516 if self.op.disks and self.op.disk_template is not None:
8517 raise errors.OpPrereqError("Disk template conversion and other disk"
8518 " changes not supported at the same time",
8521 if self.op.disk_template:
8522 _CheckDiskTemplate(self.op.disk_template)
8523 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8524 self.op.remote_node is None):
8525 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8526 " one requires specifying a secondary node",
8531 for nic_op, nic_dict in self.op.nics:
8532 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8533 if nic_op == constants.DDM_REMOVE:
8536 elif nic_op == constants.DDM_ADD:
8539 if not isinstance(nic_op, int):
8540 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8541 if not isinstance(nic_dict, dict):
8542 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8543 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8545 # nic_dict should be a dict
8546 nic_ip = nic_dict.get('ip', None)
8547 if nic_ip is not None:
8548 if nic_ip.lower() == constants.VALUE_NONE:
8549 nic_dict['ip'] = None
8551 if not utils.IsValidIP(nic_ip):
8552 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8555 nic_bridge = nic_dict.get('bridge', None)
8556 nic_link = nic_dict.get('link', None)
8557 if nic_bridge and nic_link:
8558 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8559 " at the same time", errors.ECODE_INVAL)
8560 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8561 nic_dict['bridge'] = None
8562 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8563 nic_dict['link'] = None
8565 if nic_op == constants.DDM_ADD:
8566 nic_mac = nic_dict.get('mac', None)
8568 nic_dict['mac'] = constants.VALUE_AUTO
8570 if 'mac' in nic_dict:
8571 nic_mac = nic_dict['mac']
8572 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8573 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8575 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8576 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8577 " modifying an existing nic",
8580 if nic_addremove > 1:
8581 raise errors.OpPrereqError("Only one NIC add or remove operation"
8582 " supported at a time", errors.ECODE_INVAL)
8584 def ExpandNames(self):
8585 self._ExpandAndLockInstance()
8586 self.needed_locks[locking.LEVEL_NODE] = []
8587 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8589 def DeclareLocks(self, level):
8590 if level == locking.LEVEL_NODE:
8591 self._LockInstancesNodes()
8592 if self.op.disk_template and self.op.remote_node:
8593 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8594 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8596 def BuildHooksEnv(self):
8599 This runs on the master, primary and secondaries.
8603 if constants.BE_MEMORY in self.be_new:
8604 args['memory'] = self.be_new[constants.BE_MEMORY]
8605 if constants.BE_VCPUS in self.be_new:
8606 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8607 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8608 # information at all.
8611 nic_override = dict(self.op.nics)
8612 for idx, nic in enumerate(self.instance.nics):
8613 if idx in nic_override:
8614 this_nic_override = nic_override[idx]
8616 this_nic_override = {}
8617 if 'ip' in this_nic_override:
8618 ip = this_nic_override['ip']
8621 if 'mac' in this_nic_override:
8622 mac = this_nic_override['mac']
8625 if idx in self.nic_pnew:
8626 nicparams = self.nic_pnew[idx]
8628 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8629 mode = nicparams[constants.NIC_MODE]
8630 link = nicparams[constants.NIC_LINK]
8631 args['nics'].append((ip, mac, mode, link))
8632 if constants.DDM_ADD in nic_override:
8633 ip = nic_override[constants.DDM_ADD].get('ip', None)
8634 mac = nic_override[constants.DDM_ADD]['mac']
8635 nicparams = self.nic_pnew[constants.DDM_ADD]
8636 mode = nicparams[constants.NIC_MODE]
8637 link = nicparams[constants.NIC_LINK]
8638 args['nics'].append((ip, mac, mode, link))
8639 elif constants.DDM_REMOVE in nic_override:
8640 del args['nics'][-1]
8642 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8643 if self.op.disk_template:
8644 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8645 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8648 def CheckPrereq(self):
8649 """Check prerequisites.
8651 This only checks the instance list against the existing names.
8654 # checking the new params on the primary/secondary nodes
8656 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8657 cluster = self.cluster = self.cfg.GetClusterInfo()
8658 assert self.instance is not None, \
8659 "Cannot retrieve locked instance %s" % self.op.instance_name
8660 pnode = instance.primary_node
8661 nodelist = list(instance.all_nodes)
8664 if self.op.os_name and not self.op.force:
8665 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8666 self.op.force_variant)
8667 instance_os = self.op.os_name
8669 instance_os = instance.os
8671 if self.op.disk_template:
8672 if instance.disk_template == self.op.disk_template:
8673 raise errors.OpPrereqError("Instance already has disk template %s" %
8674 instance.disk_template, errors.ECODE_INVAL)
8676 if (instance.disk_template,
8677 self.op.disk_template) not in self._DISK_CONVERSIONS:
8678 raise errors.OpPrereqError("Unsupported disk template conversion from"
8679 " %s to %s" % (instance.disk_template,
8680 self.op.disk_template),
8682 if self.op.disk_template in constants.DTS_NET_MIRROR:
8683 _CheckNodeOnline(self, self.op.remote_node)
8684 _CheckNodeNotDrained(self, self.op.remote_node)
8685 disks = [{"size": d.size} for d in instance.disks]
8686 required = _ComputeDiskSize(self.op.disk_template, disks)
8687 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8688 _CheckInstanceDown(self, instance, "cannot change disk template")
8690 # hvparams processing
8691 if self.op.hvparams:
8692 hv_type = instance.hypervisor
8693 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8694 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8695 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8698 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8699 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8700 self.hv_new = hv_new # the new actual values
8701 self.hv_inst = i_hvdict # the new dict (without defaults)
8703 self.hv_new = self.hv_inst = {}
8705 # beparams processing
8706 if self.op.beparams:
8707 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8709 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8710 be_new = cluster.SimpleFillBE(i_bedict)
8711 self.be_new = be_new # the new actual values
8712 self.be_inst = i_bedict # the new dict (without defaults)
8714 self.be_new = self.be_inst = {}
8716 # osparams processing
8717 if self.op.osparams:
8718 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8719 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8720 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8721 self.os_inst = i_osdict # the new dict (without defaults)
8723 self.os_new = self.os_inst = {}
8727 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8728 mem_check_list = [pnode]
8729 if be_new[constants.BE_AUTO_BALANCE]:
8730 # either we changed auto_balance to yes or it was from before
8731 mem_check_list.extend(instance.secondary_nodes)
8732 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8733 instance.hypervisor)
8734 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8735 instance.hypervisor)
8736 pninfo = nodeinfo[pnode]
8737 msg = pninfo.fail_msg
8739 # Assume the primary node is unreachable and go ahead
8740 self.warn.append("Can't get info from primary node %s: %s" %
8742 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8743 self.warn.append("Node data from primary node %s doesn't contain"
8744 " free memory information" % pnode)
8745 elif instance_info.fail_msg:
8746 self.warn.append("Can't get instance runtime information: %s" %
8747 instance_info.fail_msg)
8749 if instance_info.payload:
8750 current_mem = int(instance_info.payload['memory'])
8752 # Assume instance not running
8753 # (there is a slight race condition here, but it's not very probable,
8754 # and we have no other way to check)
8756 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8757 pninfo.payload['memory_free'])
8759 raise errors.OpPrereqError("This change will prevent the instance"
8760 " from starting, due to %d MB of memory"
8761 " missing on its primary node" % miss_mem,
8764 if be_new[constants.BE_AUTO_BALANCE]:
8765 for node, nres in nodeinfo.items():
8766 if node not in instance.secondary_nodes:
8770 self.warn.append("Can't get info from secondary node %s: %s" %
8772 elif not isinstance(nres.payload.get('memory_free', None), int):
8773 self.warn.append("Secondary node %s didn't return free"
8774 " memory information" % node)
8775 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8776 self.warn.append("Not enough memory to failover instance to"
8777 " secondary node %s" % node)
8782 for nic_op, nic_dict in self.op.nics:
8783 if nic_op == constants.DDM_REMOVE:
8784 if not instance.nics:
8785 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8788 if nic_op != constants.DDM_ADD:
8790 if not instance.nics:
8791 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8792 " no NICs" % nic_op,
8794 if nic_op < 0 or nic_op >= len(instance.nics):
8795 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8797 (nic_op, len(instance.nics) - 1),
8799 old_nic_params = instance.nics[nic_op].nicparams
8800 old_nic_ip = instance.nics[nic_op].ip
8805 update_params_dict = dict([(key, nic_dict[key])
8806 for key in constants.NICS_PARAMETERS
8807 if key in nic_dict])
8809 if 'bridge' in nic_dict:
8810 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8812 new_nic_params = _GetUpdatedParams(old_nic_params,
8814 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8815 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8816 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8817 self.nic_pinst[nic_op] = new_nic_params
8818 self.nic_pnew[nic_op] = new_filled_nic_params
8819 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8821 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8822 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8823 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8825 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8827 self.warn.append(msg)
8829 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8830 if new_nic_mode == constants.NIC_MODE_ROUTED:
8831 if 'ip' in nic_dict:
8832 nic_ip = nic_dict['ip']
8836 raise errors.OpPrereqError('Cannot set the nic ip to None'
8837 ' on a routed nic', errors.ECODE_INVAL)
8838 if 'mac' in nic_dict:
8839 nic_mac = nic_dict['mac']
8841 raise errors.OpPrereqError('Cannot set the nic mac to None',
8843 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8844 # otherwise generate the mac
8845 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8847 # or validate/reserve the current one
8849 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8850 except errors.ReservationError:
8851 raise errors.OpPrereqError("MAC address %s already in use"
8852 " in cluster" % nic_mac,
8853 errors.ECODE_NOTUNIQUE)
8856 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8857 raise errors.OpPrereqError("Disk operations not supported for"
8858 " diskless instances",
8860 for disk_op, _ in self.op.disks:
8861 if disk_op == constants.DDM_REMOVE:
8862 if len(instance.disks) == 1:
8863 raise errors.OpPrereqError("Cannot remove the last disk of"
8864 " an instance", errors.ECODE_INVAL)
8865 _CheckInstanceDown(self, instance, "cannot remove disks")
8867 if (disk_op == constants.DDM_ADD and
8868 len(instance.nics) >= constants.MAX_DISKS):
8869 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8870 " add more" % constants.MAX_DISKS,
8872 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8874 if disk_op < 0 or disk_op >= len(instance.disks):
8875 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8877 (disk_op, len(instance.disks)),
8882 def _ConvertPlainToDrbd(self, feedback_fn):
8883 """Converts an instance from plain to drbd.
8886 feedback_fn("Converting template to drbd")
8887 instance = self.instance
8888 pnode = instance.primary_node
8889 snode = self.op.remote_node
8891 # create a fake disk info for _GenerateDiskTemplate
8892 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8893 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8894 instance.name, pnode, [snode],
8895 disk_info, None, None, 0)
8896 info = _GetInstanceInfoText(instance)
8897 feedback_fn("Creating aditional volumes...")
8898 # first, create the missing data and meta devices
8899 for disk in new_disks:
8900 # unfortunately this is... not too nice
8901 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8903 for child in disk.children:
8904 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8905 # at this stage, all new LVs have been created, we can rename the
8907 feedback_fn("Renaming original volumes...")
8908 rename_list = [(o, n.children[0].logical_id)
8909 for (o, n) in zip(instance.disks, new_disks)]
8910 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8911 result.Raise("Failed to rename original LVs")
8913 feedback_fn("Initializing DRBD devices...")
8914 # all child devices are in place, we can now create the DRBD devices
8915 for disk in new_disks:
8916 for node in [pnode, snode]:
8917 f_create = node == pnode
8918 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8920 # at this point, the instance has been modified
8921 instance.disk_template = constants.DT_DRBD8
8922 instance.disks = new_disks
8923 self.cfg.Update(instance, feedback_fn)
8925 # disks are created, waiting for sync
8926 disk_abort = not _WaitForSync(self, instance)
8928 raise errors.OpExecError("There are some degraded disks for"
8929 " this instance, please cleanup manually")
8931 def _ConvertDrbdToPlain(self, feedback_fn):
8932 """Converts an instance from drbd to plain.
8935 instance = self.instance
8936 assert len(instance.secondary_nodes) == 1
8937 pnode = instance.primary_node
8938 snode = instance.secondary_nodes[0]
8939 feedback_fn("Converting template to plain")
8941 old_disks = instance.disks
8942 new_disks = [d.children[0] for d in old_disks]
8944 # copy over size and mode
8945 for parent, child in zip(old_disks, new_disks):
8946 child.size = parent.size
8947 child.mode = parent.mode
8949 # update instance structure
8950 instance.disks = new_disks
8951 instance.disk_template = constants.DT_PLAIN
8952 self.cfg.Update(instance, feedback_fn)
8954 feedback_fn("Removing volumes on the secondary node...")
8955 for disk in old_disks:
8956 self.cfg.SetDiskID(disk, snode)
8957 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8959 self.LogWarning("Could not remove block device %s on node %s,"
8960 " continuing anyway: %s", disk.iv_name, snode, msg)
8962 feedback_fn("Removing unneeded volumes on the primary node...")
8963 for idx, disk in enumerate(old_disks):
8964 meta = disk.children[1]
8965 self.cfg.SetDiskID(meta, pnode)
8966 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8968 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8969 " continuing anyway: %s", idx, pnode, msg)
8972 def Exec(self, feedback_fn):
8973 """Modifies an instance.
8975 All parameters take effect only at the next restart of the instance.
8978 # Process here the warnings from CheckPrereq, as we don't have a
8979 # feedback_fn there.
8980 for warn in self.warn:
8981 feedback_fn("WARNING: %s" % warn)
8984 instance = self.instance
8986 for disk_op, disk_dict in self.op.disks:
8987 if disk_op == constants.DDM_REMOVE:
8988 # remove the last disk
8989 device = instance.disks.pop()
8990 device_idx = len(instance.disks)
8991 for node, disk in device.ComputeNodeTree(instance.primary_node):
8992 self.cfg.SetDiskID(disk, node)
8993 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8995 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8996 " continuing anyway", device_idx, node, msg)
8997 result.append(("disk/%d" % device_idx, "remove"))
8998 elif disk_op == constants.DDM_ADD:
9000 if instance.disk_template == constants.DT_FILE:
9001 file_driver, file_path = instance.disks[0].logical_id
9002 file_path = os.path.dirname(file_path)
9004 file_driver = file_path = None
9005 disk_idx_base = len(instance.disks)
9006 new_disk = _GenerateDiskTemplate(self,
9007 instance.disk_template,
9008 instance.name, instance.primary_node,
9009 instance.secondary_nodes,
9014 instance.disks.append(new_disk)
9015 info = _GetInstanceInfoText(instance)
9017 logging.info("Creating volume %s for instance %s",
9018 new_disk.iv_name, instance.name)
9019 # Note: this needs to be kept in sync with _CreateDisks
9021 for node in instance.all_nodes:
9022 f_create = node == instance.primary_node
9024 _CreateBlockDev(self, node, instance, new_disk,
9025 f_create, info, f_create)
9026 except errors.OpExecError, err:
9027 self.LogWarning("Failed to create volume %s (%s) on"
9029 new_disk.iv_name, new_disk, node, err)
9030 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9031 (new_disk.size, new_disk.mode)))
9033 # change a given disk
9034 instance.disks[disk_op].mode = disk_dict['mode']
9035 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9037 if self.op.disk_template:
9038 r_shut = _ShutdownInstanceDisks(self, instance)
9040 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9041 " proceed with disk template conversion")
9042 mode = (instance.disk_template, self.op.disk_template)
9044 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9046 self.cfg.ReleaseDRBDMinors(instance.name)
9048 result.append(("disk_template", self.op.disk_template))
9051 for nic_op, nic_dict in self.op.nics:
9052 if nic_op == constants.DDM_REMOVE:
9053 # remove the last nic
9054 del instance.nics[-1]
9055 result.append(("nic.%d" % len(instance.nics), "remove"))
9056 elif nic_op == constants.DDM_ADD:
9057 # mac and bridge should be set, by now
9058 mac = nic_dict['mac']
9059 ip = nic_dict.get('ip', None)
9060 nicparams = self.nic_pinst[constants.DDM_ADD]
9061 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9062 instance.nics.append(new_nic)
9063 result.append(("nic.%d" % (len(instance.nics) - 1),
9064 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9065 (new_nic.mac, new_nic.ip,
9066 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9067 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9070 for key in 'mac', 'ip':
9072 setattr(instance.nics[nic_op], key, nic_dict[key])
9073 if nic_op in self.nic_pinst:
9074 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9075 for key, val in nic_dict.iteritems():
9076 result.append(("nic.%s/%d" % (key, nic_op), val))
9079 if self.op.hvparams:
9080 instance.hvparams = self.hv_inst
9081 for key, val in self.op.hvparams.iteritems():
9082 result.append(("hv/%s" % key, val))
9085 if self.op.beparams:
9086 instance.beparams = self.be_inst
9087 for key, val in self.op.beparams.iteritems():
9088 result.append(("be/%s" % key, val))
9092 instance.os = self.op.os_name
9095 if self.op.osparams:
9096 instance.osparams = self.os_inst
9097 for key, val in self.op.osparams.iteritems():
9098 result.append(("os/%s" % key, val))
9100 self.cfg.Update(instance, feedback_fn)
9104 _DISK_CONVERSIONS = {
9105 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9106 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9110 class LUQueryExports(NoHooksLU):
9111 """Query the exports list
9114 _OP_REQP = ['nodes']
9117 def ExpandNames(self):
9118 self.needed_locks = {}
9119 self.share_locks[locking.LEVEL_NODE] = 1
9120 if not self.op.nodes:
9121 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9123 self.needed_locks[locking.LEVEL_NODE] = \
9124 _GetWantedNodes(self, self.op.nodes)
9126 def CheckPrereq(self):
9127 """Check prerequisites.
9130 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9132 def Exec(self, feedback_fn):
9133 """Compute the list of all the exported system images.
9136 @return: a dictionary with the structure node->(export-list)
9137 where export-list is a list of the instances exported on
9141 rpcresult = self.rpc.call_export_list(self.nodes)
9143 for node in rpcresult:
9144 if rpcresult[node].fail_msg:
9145 result[node] = False
9147 result[node] = rpcresult[node].payload
9152 class LUPrepareExport(NoHooksLU):
9153 """Prepares an instance for an export and returns useful information.
9156 _OP_REQP = ["instance_name", "mode"]
9159 def CheckArguments(self):
9160 """Check the arguments.
9163 if self.op.mode not in constants.EXPORT_MODES:
9164 raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
9167 def ExpandNames(self):
9168 self._ExpandAndLockInstance()
9170 def CheckPrereq(self):
9171 """Check prerequisites.
9174 instance_name = self.op.instance_name
9176 self.instance = self.cfg.GetInstanceInfo(instance_name)
9177 assert self.instance is not None, \
9178 "Cannot retrieve locked instance %s" % self.op.instance_name
9179 _CheckNodeOnline(self, self.instance.primary_node)
9181 self._cds = _GetClusterDomainSecret()
9183 def Exec(self, feedback_fn):
9184 """Prepares an instance for an export.
9187 instance = self.instance
9189 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9190 salt = utils.GenerateSecret(8)
9192 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9193 result = self.rpc.call_x509_cert_create(instance.primary_node,
9194 constants.RIE_CERT_VALIDITY)
9195 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9197 (name, cert_pem) = result.payload
9199 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9203 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9204 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9206 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9212 class LUExportInstance(LogicalUnit):
9213 """Export an instance to an image in the cluster.
9216 HPATH = "instance-export"
9217 HTYPE = constants.HTYPE_INSTANCE
9218 _OP_REQP = ["instance_name", "target_node", "shutdown"]
9220 ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9221 ("remove_instance", False),
9222 ("ignore_remove_failures", False),
9223 ("mode", constants.EXPORT_MODE_LOCAL),
9224 ("x509_key_name", None),
9225 ("destination_x509_ca", None),
9229 def CheckArguments(self):
9230 """Check the arguments.
9233 self.x509_key_name = self.op.x509_key_name
9234 self.dest_x509_ca_pem = self.op.destination_x509_ca
9236 if self.op.remove_instance and not self.op.shutdown:
9237 raise errors.OpPrereqError("Can not remove instance without shutting it"
9240 if self.op.mode not in constants.EXPORT_MODES:
9241 raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
9244 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9245 if not self.x509_key_name:
9246 raise errors.OpPrereqError("Missing X509 key name for encryption",
9249 if not self.dest_x509_ca_pem:
9250 raise errors.OpPrereqError("Missing destination X509 CA",
9253 def ExpandNames(self):
9254 self._ExpandAndLockInstance()
9256 # Lock all nodes for local exports
9257 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9258 # FIXME: lock only instance primary and destination node
9260 # Sad but true, for now we have do lock all nodes, as we don't know where
9261 # the previous export might be, and in this LU we search for it and
9262 # remove it from its current node. In the future we could fix this by:
9263 # - making a tasklet to search (share-lock all), then create the
9264 # new one, then one to remove, after
9265 # - removing the removal operation altogether
9266 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9268 def DeclareLocks(self, level):
9269 """Last minute lock declaration."""
9270 # All nodes are locked anyway, so nothing to do here.
9272 def BuildHooksEnv(self):
9275 This will run on the master, primary node and target node.
9279 "EXPORT_MODE": self.op.mode,
9280 "EXPORT_NODE": self.op.target_node,
9281 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9282 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9283 # TODO: Generic function for boolean env variables
9284 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9287 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9289 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9291 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9292 nl.append(self.op.target_node)
9296 def CheckPrereq(self):
9297 """Check prerequisites.
9299 This checks that the instance and node names are valid.
9302 instance_name = self.op.instance_name
9304 self.instance = self.cfg.GetInstanceInfo(instance_name)
9305 assert self.instance is not None, \
9306 "Cannot retrieve locked instance %s" % self.op.instance_name
9307 _CheckNodeOnline(self, self.instance.primary_node)
9309 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9310 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9311 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9312 assert self.dst_node is not None
9314 _CheckNodeOnline(self, self.dst_node.name)
9315 _CheckNodeNotDrained(self, self.dst_node.name)
9318 self.dest_disk_info = None
9319 self.dest_x509_ca = None
9321 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9322 self.dst_node = None
9324 if len(self.op.target_node) != len(self.instance.disks):
9325 raise errors.OpPrereqError(("Received destination information for %s"
9326 " disks, but instance %s has %s disks") %
9327 (len(self.op.target_node), instance_name,
9328 len(self.instance.disks)),
9331 cds = _GetClusterDomainSecret()
9333 # Check X509 key name
9335 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9336 except (TypeError, ValueError), err:
9337 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9339 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9340 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9343 # Load and verify CA
9345 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9346 except OpenSSL.crypto.Error, err:
9347 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9348 (err, ), errors.ECODE_INVAL)
9350 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9351 if errcode is not None:
9352 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9353 (msg, ), errors.ECODE_INVAL)
9355 self.dest_x509_ca = cert
9357 # Verify target information
9359 for idx, disk_data in enumerate(self.op.target_node):
9361 (host, port, magic) = \
9362 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9363 except errors.GenericError, err:
9364 raise errors.OpPrereqError("Target info for disk %s: %s" %
9365 (idx, err), errors.ECODE_INVAL)
9367 disk_info.append((host, port, magic))
9369 assert len(disk_info) == len(self.op.target_node)
9370 self.dest_disk_info = disk_info
9373 raise errors.ProgrammerError("Unhandled export mode %r" %
9376 # instance disk type verification
9377 # TODO: Implement export support for file-based disks
9378 for disk in self.instance.disks:
9379 if disk.dev_type == constants.LD_FILE:
9380 raise errors.OpPrereqError("Export not supported for instances with"
9381 " file-based disks", errors.ECODE_INVAL)
9383 def _CleanupExports(self, feedback_fn):
9384 """Removes exports of current instance from all other nodes.
9386 If an instance in a cluster with nodes A..D was exported to node C, its
9387 exports will be removed from the nodes A, B and D.
9390 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9392 nodelist = self.cfg.GetNodeList()
9393 nodelist.remove(self.dst_node.name)
9395 # on one-node clusters nodelist will be empty after the removal
9396 # if we proceed the backup would be removed because OpQueryExports
9397 # substitutes an empty list with the full cluster node list.
9398 iname = self.instance.name
9400 feedback_fn("Removing old exports for instance %s" % iname)
9401 exportlist = self.rpc.call_export_list(nodelist)
9402 for node in exportlist:
9403 if exportlist[node].fail_msg:
9405 if iname in exportlist[node].payload:
9406 msg = self.rpc.call_export_remove(node, iname).fail_msg
9408 self.LogWarning("Could not remove older export for instance %s"
9409 " on node %s: %s", iname, node, msg)
9411 def Exec(self, feedback_fn):
9412 """Export an instance to an image in the cluster.
9415 assert self.op.mode in constants.EXPORT_MODES
9417 instance = self.instance
9418 src_node = instance.primary_node
9420 if self.op.shutdown:
9421 # shutdown the instance, but not the disks
9422 feedback_fn("Shutting down instance %s" % instance.name)
9423 result = self.rpc.call_instance_shutdown(src_node, instance,
9424 self.op.shutdown_timeout)
9425 # TODO: Maybe ignore failures if ignore_remove_failures is set
9426 result.Raise("Could not shutdown instance %s on"
9427 " node %s" % (instance.name, src_node))
9429 # set the disks ID correctly since call_instance_start needs the
9430 # correct drbd minor to create the symlinks
9431 for disk in instance.disks:
9432 self.cfg.SetDiskID(disk, src_node)
9434 activate_disks = (not instance.admin_up)
9437 # Activate the instance disks if we'exporting a stopped instance
9438 feedback_fn("Activating disks for %s" % instance.name)
9439 _StartInstanceDisks(self, instance, None)
9442 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9445 helper.CreateSnapshots()
9447 if (self.op.shutdown and instance.admin_up and
9448 not self.op.remove_instance):
9449 assert not activate_disks
9450 feedback_fn("Starting instance %s" % instance.name)
9451 result = self.rpc.call_instance_start(src_node, instance, None, None)
9452 msg = result.fail_msg
9454 feedback_fn("Failed to start instance: %s" % msg)
9455 _ShutdownInstanceDisks(self, instance)
9456 raise errors.OpExecError("Could not start instance: %s" % msg)
9458 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9459 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9460 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9461 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9462 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9464 (key_name, _, _) = self.x509_key_name
9467 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9470 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9471 key_name, dest_ca_pem,
9476 # Check for backwards compatibility
9477 assert len(dresults) == len(instance.disks)
9478 assert compat.all(isinstance(i, bool) for i in dresults), \
9479 "Not all results are boolean: %r" % dresults
9483 feedback_fn("Deactivating disks for %s" % instance.name)
9484 _ShutdownInstanceDisks(self, instance)
9486 # Remove instance if requested
9487 if self.op.remove_instance:
9488 if not (compat.all(dresults) and fin_resu):
9489 feedback_fn("Not removing instance %s as parts of the export failed" %
9492 feedback_fn("Removing instance %s" % instance.name)
9493 _RemoveInstance(self, feedback_fn, instance,
9494 self.op.ignore_remove_failures)
9496 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9497 self._CleanupExports(feedback_fn)
9499 return fin_resu, dresults
9502 class LURemoveExport(NoHooksLU):
9503 """Remove exports related to the named instance.
9506 _OP_REQP = ["instance_name"]
9509 def ExpandNames(self):
9510 self.needed_locks = {}
9511 # We need all nodes to be locked in order for RemoveExport to work, but we
9512 # don't need to lock the instance itself, as nothing will happen to it (and
9513 # we can remove exports also for a removed instance)
9514 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9516 def CheckPrereq(self):
9517 """Check prerequisites.
9521 def Exec(self, feedback_fn):
9522 """Remove any export.
9525 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9526 # If the instance was not found we'll try with the name that was passed in.
9527 # This will only work if it was an FQDN, though.
9529 if not instance_name:
9531 instance_name = self.op.instance_name
9533 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9534 exportlist = self.rpc.call_export_list(locked_nodes)
9536 for node in exportlist:
9537 msg = exportlist[node].fail_msg
9539 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9541 if instance_name in exportlist[node].payload:
9543 result = self.rpc.call_export_remove(node, instance_name)
9544 msg = result.fail_msg
9546 logging.error("Could not remove export for instance %s"
9547 " on node %s: %s", instance_name, node, msg)
9549 if fqdn_warn and not found:
9550 feedback_fn("Export not found. If trying to remove an export belonging"
9551 " to a deleted instance please use its Fully Qualified"
9555 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9558 This is an abstract class which is the parent of all the other tags LUs.
9562 def ExpandNames(self):
9563 self.needed_locks = {}
9564 if self.op.kind == constants.TAG_NODE:
9565 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9566 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9567 elif self.op.kind == constants.TAG_INSTANCE:
9568 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9569 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9571 def CheckPrereq(self):
9572 """Check prerequisites.
9575 if self.op.kind == constants.TAG_CLUSTER:
9576 self.target = self.cfg.GetClusterInfo()
9577 elif self.op.kind == constants.TAG_NODE:
9578 self.target = self.cfg.GetNodeInfo(self.op.name)
9579 elif self.op.kind == constants.TAG_INSTANCE:
9580 self.target = self.cfg.GetInstanceInfo(self.op.name)
9582 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9583 str(self.op.kind), errors.ECODE_INVAL)
9586 class LUGetTags(TagsLU):
9587 """Returns the tags of a given object.
9590 _OP_REQP = ["kind", "name"]
9593 def Exec(self, feedback_fn):
9594 """Returns the tag list.
9597 return list(self.target.GetTags())
9600 class LUSearchTags(NoHooksLU):
9601 """Searches the tags for a given pattern.
9604 _OP_REQP = ["pattern"]
9607 def ExpandNames(self):
9608 self.needed_locks = {}
9610 def CheckPrereq(self):
9611 """Check prerequisites.
9613 This checks the pattern passed for validity by compiling it.
9617 self.re = re.compile(self.op.pattern)
9618 except re.error, err:
9619 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9620 (self.op.pattern, err), errors.ECODE_INVAL)
9622 def Exec(self, feedback_fn):
9623 """Returns the tag list.
9627 tgts = [("/cluster", cfg.GetClusterInfo())]
9628 ilist = cfg.GetAllInstancesInfo().values()
9629 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9630 nlist = cfg.GetAllNodesInfo().values()
9631 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9633 for path, target in tgts:
9634 for tag in target.GetTags():
9635 if self.re.search(tag):
9636 results.append((path, tag))
9640 class LUAddTags(TagsLU):
9641 """Sets a tag on a given object.
9644 _OP_REQP = ["kind", "name", "tags"]
9647 def CheckPrereq(self):
9648 """Check prerequisites.
9650 This checks the type and length of the tag name and value.
9653 TagsLU.CheckPrereq(self)
9654 for tag in self.op.tags:
9655 objects.TaggableObject.ValidateTag(tag)
9657 def Exec(self, feedback_fn):
9662 for tag in self.op.tags:
9663 self.target.AddTag(tag)
9664 except errors.TagError, err:
9665 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9666 self.cfg.Update(self.target, feedback_fn)
9669 class LUDelTags(TagsLU):
9670 """Delete a list of tags from a given object.
9673 _OP_REQP = ["kind", "name", "tags"]
9676 def CheckPrereq(self):
9677 """Check prerequisites.
9679 This checks that we have the given tag.
9682 TagsLU.CheckPrereq(self)
9683 for tag in self.op.tags:
9684 objects.TaggableObject.ValidateTag(tag)
9685 del_tags = frozenset(self.op.tags)
9686 cur_tags = self.target.GetTags()
9687 if not del_tags <= cur_tags:
9688 diff_tags = del_tags - cur_tags
9689 diff_names = ["'%s'" % tag for tag in diff_tags]
9691 raise errors.OpPrereqError("Tag(s) %s not found" %
9692 (",".join(diff_names)), errors.ECODE_NOENT)
9694 def Exec(self, feedback_fn):
9695 """Remove the tag from the object.
9698 for tag in self.op.tags:
9699 self.target.RemoveTag(tag)
9700 self.cfg.Update(self.target, feedback_fn)
9703 class LUTestDelay(NoHooksLU):
9704 """Sleep for a specified amount of time.
9706 This LU sleeps on the master and/or nodes for a specified amount of
9710 _OP_REQP = ["duration", "on_master", "on_nodes"]
9713 def CheckArguments(self):
9714 # TODO: convert to the type system
9715 self.op.repeat = getattr(self.op, "repeat", 0)
9716 if self.op.repeat < 0:
9717 raise errors.OpPrereqError("Repetition count cannot be negative")
9719 def ExpandNames(self):
9720 """Expand names and set required locks.
9722 This expands the node list, if any.
9725 self.needed_locks = {}
9726 if self.op.on_nodes:
9727 # _GetWantedNodes can be used here, but is not always appropriate to use
9728 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9730 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9731 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9733 def CheckPrereq(self):
9734 """Check prerequisites.
9738 def _TestDelay(self):
9739 """Do the actual sleep.
9742 if self.op.on_master:
9743 if not utils.TestDelay(self.op.duration):
9744 raise errors.OpExecError("Error during master delay test")
9745 if self.op.on_nodes:
9746 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9747 for node, node_result in result.items():
9748 node_result.Raise("Failure during rpc call to node %s" % node)
9750 def Exec(self, feedback_fn):
9751 """Execute the test delay opcode, with the wanted repetitions.
9754 if self.op.repeat == 0:
9757 top_value = self.op.repeat - 1
9758 for i in range(self.op.repeat):
9759 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9763 class IAllocator(object):
9764 """IAllocator framework.
9766 An IAllocator instance has three sets of attributes:
9767 - cfg that is needed to query the cluster
9768 - input data (all members of the _KEYS class attribute are required)
9769 - four buffer attributes (in|out_data|text), that represent the
9770 input (to the external script) in text and data structure format,
9771 and the output from it, again in two formats
9772 - the result variables from the script (success, info, nodes) for
9776 # pylint: disable-msg=R0902
9777 # lots of instance attributes
9779 "name", "mem_size", "disks", "disk_template",
9780 "os", "tags", "nics", "vcpus", "hypervisor",
9783 "name", "relocate_from",
9789 def __init__(self, cfg, rpc, mode, **kwargs):
9792 # init buffer variables
9793 self.in_text = self.out_text = self.in_data = self.out_data = None
9794 # init all input fields so that pylint is happy
9796 self.mem_size = self.disks = self.disk_template = None
9797 self.os = self.tags = self.nics = self.vcpus = None
9798 self.hypervisor = None
9799 self.relocate_from = None
9801 self.evac_nodes = None
9803 self.required_nodes = None
9804 # init result fields
9805 self.success = self.info = self.result = None
9806 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9807 keyset = self._ALLO_KEYS
9808 fn = self._AddNewInstance
9809 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9810 keyset = self._RELO_KEYS
9811 fn = self._AddRelocateInstance
9812 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9813 keyset = self._EVAC_KEYS
9814 fn = self._AddEvacuateNodes
9816 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9817 " IAllocator" % self.mode)
9819 if key not in keyset:
9820 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9821 " IAllocator" % key)
9822 setattr(self, key, kwargs[key])
9825 if key not in kwargs:
9826 raise errors.ProgrammerError("Missing input parameter '%s' to"
9827 " IAllocator" % key)
9828 self._BuildInputData(fn)
9830 def _ComputeClusterData(self):
9831 """Compute the generic allocator input data.
9833 This is the data that is independent of the actual operation.
9837 cluster_info = cfg.GetClusterInfo()
9840 "version": constants.IALLOCATOR_VERSION,
9841 "cluster_name": cfg.GetClusterName(),
9842 "cluster_tags": list(cluster_info.GetTags()),
9843 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9844 # we don't have job IDs
9846 iinfo = cfg.GetAllInstancesInfo().values()
9847 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9851 node_list = cfg.GetNodeList()
9853 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9854 hypervisor_name = self.hypervisor
9855 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9856 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9857 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9858 hypervisor_name = cluster_info.enabled_hypervisors[0]
9860 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9863 self.rpc.call_all_instances_info(node_list,
9864 cluster_info.enabled_hypervisors)
9865 for nname, nresult in node_data.items():
9866 # first fill in static (config-based) values
9867 ninfo = cfg.GetNodeInfo(nname)
9869 "tags": list(ninfo.GetTags()),
9870 "primary_ip": ninfo.primary_ip,
9871 "secondary_ip": ninfo.secondary_ip,
9872 "offline": ninfo.offline,
9873 "drained": ninfo.drained,
9874 "master_candidate": ninfo.master_candidate,
9877 if not (ninfo.offline or ninfo.drained):
9878 nresult.Raise("Can't get data for node %s" % nname)
9879 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9881 remote_info = nresult.payload
9883 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9884 'vg_size', 'vg_free', 'cpu_total']:
9885 if attr not in remote_info:
9886 raise errors.OpExecError("Node '%s' didn't return attribute"
9887 " '%s'" % (nname, attr))
9888 if not isinstance(remote_info[attr], int):
9889 raise errors.OpExecError("Node '%s' returned invalid value"
9891 (nname, attr, remote_info[attr]))
9892 # compute memory used by primary instances
9893 i_p_mem = i_p_up_mem = 0
9894 for iinfo, beinfo in i_list:
9895 if iinfo.primary_node == nname:
9896 i_p_mem += beinfo[constants.BE_MEMORY]
9897 if iinfo.name not in node_iinfo[nname].payload:
9900 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9901 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9902 remote_info['memory_free'] -= max(0, i_mem_diff)
9905 i_p_up_mem += beinfo[constants.BE_MEMORY]
9907 # compute memory used by instances
9909 "total_memory": remote_info['memory_total'],
9910 "reserved_memory": remote_info['memory_dom0'],
9911 "free_memory": remote_info['memory_free'],
9912 "total_disk": remote_info['vg_size'],
9913 "free_disk": remote_info['vg_free'],
9914 "total_cpus": remote_info['cpu_total'],
9915 "i_pri_memory": i_p_mem,
9916 "i_pri_up_memory": i_p_up_mem,
9920 node_results[nname] = pnr
9921 data["nodes"] = node_results
9925 for iinfo, beinfo in i_list:
9927 for nic in iinfo.nics:
9928 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9929 nic_dict = {"mac": nic.mac,
9931 "mode": filled_params[constants.NIC_MODE],
9932 "link": filled_params[constants.NIC_LINK],
9934 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9935 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9936 nic_data.append(nic_dict)
9938 "tags": list(iinfo.GetTags()),
9939 "admin_up": iinfo.admin_up,
9940 "vcpus": beinfo[constants.BE_VCPUS],
9941 "memory": beinfo[constants.BE_MEMORY],
9943 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9945 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9946 "disk_template": iinfo.disk_template,
9947 "hypervisor": iinfo.hypervisor,
9949 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9951 instance_data[iinfo.name] = pir
9953 data["instances"] = instance_data
9957 def _AddNewInstance(self):
9958 """Add new instance data to allocator structure.
9960 This in combination with _AllocatorGetClusterData will create the
9961 correct structure needed as input for the allocator.
9963 The checks for the completeness of the opcode must have already been
9967 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9969 if self.disk_template in constants.DTS_NET_MIRROR:
9970 self.required_nodes = 2
9972 self.required_nodes = 1
9975 "disk_template": self.disk_template,
9978 "vcpus": self.vcpus,
9979 "memory": self.mem_size,
9980 "disks": self.disks,
9981 "disk_space_total": disk_space,
9983 "required_nodes": self.required_nodes,
9987 def _AddRelocateInstance(self):
9988 """Add relocate instance data to allocator structure.
9990 This in combination with _IAllocatorGetClusterData will create the
9991 correct structure needed as input for the allocator.
9993 The checks for the completeness of the opcode must have already been
9997 instance = self.cfg.GetInstanceInfo(self.name)
9998 if instance is None:
9999 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10000 " IAllocator" % self.name)
10002 if instance.disk_template not in constants.DTS_NET_MIRROR:
10003 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10004 errors.ECODE_INVAL)
10006 if len(instance.secondary_nodes) != 1:
10007 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10008 errors.ECODE_STATE)
10010 self.required_nodes = 1
10011 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10012 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10016 "disk_space_total": disk_space,
10017 "required_nodes": self.required_nodes,
10018 "relocate_from": self.relocate_from,
10022 def _AddEvacuateNodes(self):
10023 """Add evacuate nodes data to allocator structure.
10027 "evac_nodes": self.evac_nodes
10031 def _BuildInputData(self, fn):
10032 """Build input data structures.
10035 self._ComputeClusterData()
10038 request["type"] = self.mode
10039 self.in_data["request"] = request
10041 self.in_text = serializer.Dump(self.in_data)
10043 def Run(self, name, validate=True, call_fn=None):
10044 """Run an instance allocator and return the results.
10047 if call_fn is None:
10048 call_fn = self.rpc.call_iallocator_runner
10050 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10051 result.Raise("Failure while running the iallocator script")
10053 self.out_text = result.payload
10055 self._ValidateResult()
10057 def _ValidateResult(self):
10058 """Process the allocator results.
10060 This will process and if successful save the result in
10061 self.out_data and the other parameters.
10065 rdict = serializer.Load(self.out_text)
10066 except Exception, err:
10067 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10069 if not isinstance(rdict, dict):
10070 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10072 # TODO: remove backwards compatiblity in later versions
10073 if "nodes" in rdict and "result" not in rdict:
10074 rdict["result"] = rdict["nodes"]
10077 for key in "success", "info", "result":
10078 if key not in rdict:
10079 raise errors.OpExecError("Can't parse iallocator results:"
10080 " missing key '%s'" % key)
10081 setattr(self, key, rdict[key])
10083 if not isinstance(rdict["result"], list):
10084 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10086 self.out_data = rdict
10089 class LUTestAllocator(NoHooksLU):
10090 """Run allocator tests.
10092 This LU runs the allocator tests
10095 _OP_REQP = ["direction", "mode", "name"]
10097 ("hypervisor", None),
10098 ("allocator", None),
10101 def CheckPrereq(self):
10102 """Check prerequisites.
10104 This checks the opcode parameters depending on the director and mode test.
10107 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10108 for attr in ["name", "mem_size", "disks", "disk_template",
10109 "os", "tags", "nics", "vcpus"]:
10110 if not hasattr(self.op, attr):
10111 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10112 attr, errors.ECODE_INVAL)
10113 iname = self.cfg.ExpandInstanceName(self.op.name)
10114 if iname is not None:
10115 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10116 iname, errors.ECODE_EXISTS)
10117 if not isinstance(self.op.nics, list):
10118 raise errors.OpPrereqError("Invalid parameter 'nics'",
10119 errors.ECODE_INVAL)
10120 for row in self.op.nics:
10121 if (not isinstance(row, dict) or
10122 "mac" not in row or
10124 "bridge" not in row):
10125 raise errors.OpPrereqError("Invalid contents of the 'nics'"
10126 " parameter", errors.ECODE_INVAL)
10127 if not isinstance(self.op.disks, list):
10128 raise errors.OpPrereqError("Invalid parameter 'disks'",
10129 errors.ECODE_INVAL)
10130 for row in self.op.disks:
10131 if (not isinstance(row, dict) or
10132 "size" not in row or
10133 not isinstance(row["size"], int) or
10134 "mode" not in row or
10135 row["mode"] not in ['r', 'w']):
10136 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10137 " parameter", errors.ECODE_INVAL)
10138 if self.op.hypervisor is None:
10139 self.op.hypervisor = self.cfg.GetHypervisorType()
10140 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10141 if not hasattr(self.op, "name"):
10142 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
10143 errors.ECODE_INVAL)
10144 fname = _ExpandInstanceName(self.cfg, self.op.name)
10145 self.op.name = fname
10146 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10147 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10148 if not hasattr(self.op, "evac_nodes"):
10149 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10150 " opcode input", errors.ECODE_INVAL)
10152 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10153 self.op.mode, errors.ECODE_INVAL)
10155 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10156 if self.op.allocator is None:
10157 raise errors.OpPrereqError("Missing allocator name",
10158 errors.ECODE_INVAL)
10159 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10160 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10161 self.op.direction, errors.ECODE_INVAL)
10163 def Exec(self, feedback_fn):
10164 """Run the allocator test.
10167 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10168 ial = IAllocator(self.cfg, self.rpc,
10171 mem_size=self.op.mem_size,
10172 disks=self.op.disks,
10173 disk_template=self.op.disk_template,
10177 vcpus=self.op.vcpus,
10178 hypervisor=self.op.hypervisor,
10180 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10181 ial = IAllocator(self.cfg, self.rpc,
10184 relocate_from=list(self.relocate_from),
10186 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10187 ial = IAllocator(self.cfg, self.rpc,
10189 evac_nodes=self.op.evac_nodes)
10191 raise errors.ProgrammerError("Uncatched mode %s in"
10192 " LUTestAllocator.Exec", self.op.mode)
10194 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10195 result = ial.in_text
10197 ial.Run(self.op.allocator, validate=False)
10198 result = ial.out_text