4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
59 import ganeti.masterd.instance # pylint: disable-msg=W0611
61 # Common opcode attributes
63 #: output fields for a query operation
64 _POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
67 #: the shutdown timeout
68 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
71 #: the force parameter
72 _PForce = ("force", False, ht.TBool)
74 #: a required instance name (for single-instance LUs)
75 _PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
77 #: Whether to ignore offline nodes
78 _PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
80 #: a required node name (for single-node LUs)
81 _PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
83 #: the migration type (live/non-live)
84 _PMigrationMode = ("mode", None,
85 ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
87 #: the obsolete 'live' mode (boolean)
88 _PMigrationLive = ("live", None, ht.TMaybeBool)
92 class LogicalUnit(object):
93 """Logical Unit base class.
95 Subclasses must follow these rules:
96 - implement ExpandNames
97 - implement CheckPrereq (except when tasklets are used)
98 - implement Exec (except when tasklets are used)
99 - implement BuildHooksEnv
100 - redefine HPATH and HTYPE
101 - optionally redefine their run requirements:
102 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
104 Note that all commands require root permissions.
106 @ivar dry_run_result: the value (if any) that will be returned to the caller
107 in dry-run mode (signalled by opcode dry_run parameter)
108 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
109 they should get if not already defined, and types they must match
117 def __init__(self, processor, op, context, rpc):
118 """Constructor for LogicalUnit.
120 This needs to be overridden in derived classes in order to check op
124 self.proc = processor
126 self.cfg = context.cfg
127 self.context = context
129 # Dicts used to declare locking needs to mcpu
130 self.needed_locks = None
131 self.acquired_locks = {}
132 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134 self.remove_locks = {}
135 # Used to force good behavior when calling helper functions
136 self.recalculate_locks = {}
139 self.Log = processor.Log # pylint: disable-msg=C0103
140 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
141 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
142 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
143 # support for dry-run
144 self.dry_run_result = None
145 # support for generic debug attribute
146 if (not hasattr(self.op, "debug_level") or
147 not isinstance(self.op.debug_level, int)):
148 self.op.debug_level = 0
153 # The new kind-of-type-system
154 op_id = self.op.OP_ID
155 for attr_name, aval, test in self._OP_PARAMS:
156 if not hasattr(op, attr_name):
157 if aval == ht.NoDefault:
158 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
159 (op_id, attr_name), errors.ECODE_INVAL)
165 setattr(self.op, attr_name, dval)
166 attr_val = getattr(op, attr_name)
167 if test == ht.NoType:
170 if not callable(test):
171 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
172 " given type is not a proper type (%s)" %
173 (op_id, attr_name, test))
174 if not test(attr_val):
175 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
176 self.op.OP_ID, attr_name, type(attr_val), attr_val)
177 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
178 (op_id, attr_name), errors.ECODE_INVAL)
180 self.CheckArguments()
183 """Returns the SshRunner object
187 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
190 ssh = property(fget=__GetSSH)
192 def CheckArguments(self):
193 """Check syntactic validity for the opcode arguments.
195 This method is for doing a simple syntactic check and ensure
196 validity of opcode parameters, without any cluster-related
197 checks. While the same can be accomplished in ExpandNames and/or
198 CheckPrereq, doing these separate is better because:
200 - ExpandNames is left as as purely a lock-related function
201 - CheckPrereq is run after we have acquired locks (and possible
204 The function is allowed to change the self.op attribute so that
205 later methods can no longer worry about missing parameters.
210 def ExpandNames(self):
211 """Expand names for this LU.
213 This method is called before starting to execute the opcode, and it should
214 update all the parameters of the opcode to their canonical form (e.g. a
215 short node name must be fully expanded after this method has successfully
216 completed). This way locking, hooks, logging, ecc. can work correctly.
218 LUs which implement this method must also populate the self.needed_locks
219 member, as a dict with lock levels as keys, and a list of needed lock names
222 - use an empty dict if you don't need any lock
223 - if you don't need any lock at a particular level omit that level
224 - don't put anything for the BGL level
225 - if you want all locks at a level use locking.ALL_SET as a value
227 If you need to share locks (rather than acquire them exclusively) at one
228 level you can modify self.share_locks, setting a true value (usually 1) for
229 that level. By default locks are not shared.
231 This function can also define a list of tasklets, which then will be
232 executed in order instead of the usual LU-level CheckPrereq and Exec
233 functions, if those are not defined by the LU.
237 # Acquire all nodes and one instance
238 self.needed_locks = {
239 locking.LEVEL_NODE: locking.ALL_SET,
240 locking.LEVEL_INSTANCE: ['instance1.example.com'],
242 # Acquire just two nodes
243 self.needed_locks = {
244 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
247 self.needed_locks = {} # No, you can't leave it to the default value None
250 # The implementation of this method is mandatory only if the new LU is
251 # concurrent, so that old LUs don't need to be changed all at the same
254 self.needed_locks = {} # Exclusive LUs don't need locks.
256 raise NotImplementedError
258 def DeclareLocks(self, level):
259 """Declare LU locking needs for a level
261 While most LUs can just declare their locking needs at ExpandNames time,
262 sometimes there's the need to calculate some locks after having acquired
263 the ones before. This function is called just before acquiring locks at a
264 particular level, but after acquiring the ones at lower levels, and permits
265 such calculations. It can be used to modify self.needed_locks, and by
266 default it does nothing.
268 This function is only called if you have something already set in
269 self.needed_locks for the level.
271 @param level: Locking level which is going to be locked
272 @type level: member of ganeti.locking.LEVELS
276 def CheckPrereq(self):
277 """Check prerequisites for this LU.
279 This method should check that the prerequisites for the execution
280 of this LU are fulfilled. It can do internode communication, but
281 it should be idempotent - no cluster or system changes are
284 The method should raise errors.OpPrereqError in case something is
285 not fulfilled. Its return value is ignored.
287 This method should also update all the parameters of the opcode to
288 their canonical form if it hasn't been done by ExpandNames before.
291 if self.tasklets is not None:
292 for (idx, tl) in enumerate(self.tasklets):
293 logging.debug("Checking prerequisites for tasklet %s/%s",
294 idx + 1, len(self.tasklets))
299 def Exec(self, feedback_fn):
302 This method should implement the actual work. It should raise
303 errors.OpExecError for failures that are somewhat dealt with in
307 if self.tasklets is not None:
308 for (idx, tl) in enumerate(self.tasklets):
309 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
312 raise NotImplementedError
314 def BuildHooksEnv(self):
315 """Build hooks environment for this LU.
317 This method should return a three-node tuple consisting of: a dict
318 containing the environment that will be used for running the
319 specific hook for this LU, a list of node names on which the hook
320 should run before the execution, and a list of node names on which
321 the hook should run after the execution.
323 The keys of the dict must not have 'GANETI_' prefixed as this will
324 be handled in the hooks runner. Also note additional keys will be
325 added by the hooks runner. If the LU doesn't define any
326 environment, an empty dict (and not None) should be returned.
328 No nodes should be returned as an empty list (and not None).
330 Note that if the HPATH for a LU class is None, this function will
334 raise NotImplementedError
336 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
337 """Notify the LU about the results of its hooks.
339 This method is called every time a hooks phase is executed, and notifies
340 the Logical Unit about the hooks' result. The LU can then use it to alter
341 its result based on the hooks. By default the method does nothing and the
342 previous result is passed back unchanged but any LU can define it if it
343 wants to use the local cluster hook-scripts somehow.
345 @param phase: one of L{constants.HOOKS_PHASE_POST} or
346 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
347 @param hook_results: the results of the multi-node hooks rpc call
348 @param feedback_fn: function used send feedback back to the caller
349 @param lu_result: the previous Exec result this LU had, or None
351 @return: the new Exec result, based on the previous result
355 # API must be kept, thus we ignore the unused argument and could
356 # be a function warnings
357 # pylint: disable-msg=W0613,R0201
360 def _ExpandAndLockInstance(self):
361 """Helper function to expand and lock an instance.
363 Many LUs that work on an instance take its name in self.op.instance_name
364 and need to expand it and then declare the expanded name for locking. This
365 function does it, and then updates self.op.instance_name to the expanded
366 name. It also initializes needed_locks as a dict, if this hasn't been done
370 if self.needed_locks is None:
371 self.needed_locks = {}
373 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
374 "_ExpandAndLockInstance called with instance-level locks set"
375 self.op.instance_name = _ExpandInstanceName(self.cfg,
376 self.op.instance_name)
377 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
379 def _LockInstancesNodes(self, primary_only=False):
380 """Helper function to declare instances' nodes for locking.
382 This function should be called after locking one or more instances to lock
383 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
384 with all primary or secondary nodes for instances already locked and
385 present in self.needed_locks[locking.LEVEL_INSTANCE].
387 It should be called from DeclareLocks, and for safety only works if
388 self.recalculate_locks[locking.LEVEL_NODE] is set.
390 In the future it may grow parameters to just lock some instance's nodes, or
391 to just lock primaries or secondary nodes, if needed.
393 If should be called in DeclareLocks in a way similar to::
395 if level == locking.LEVEL_NODE:
396 self._LockInstancesNodes()
398 @type primary_only: boolean
399 @param primary_only: only lock primary nodes of locked instances
402 assert locking.LEVEL_NODE in self.recalculate_locks, \
403 "_LockInstancesNodes helper function called with no nodes to recalculate"
405 # TODO: check if we're really been called with the instance locks held
407 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
408 # future we might want to have different behaviors depending on the value
409 # of self.recalculate_locks[locking.LEVEL_NODE]
411 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
412 instance = self.context.cfg.GetInstanceInfo(instance_name)
413 wanted_nodes.append(instance.primary_node)
415 wanted_nodes.extend(instance.secondary_nodes)
417 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
418 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
419 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
420 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
422 del self.recalculate_locks[locking.LEVEL_NODE]
425 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
426 """Simple LU which runs no hooks.
428 This LU is intended as a parent for other LogicalUnits which will
429 run no hooks, in order to reduce duplicate code.
435 def BuildHooksEnv(self):
436 """Empty BuildHooksEnv for NoHooksLu.
438 This just raises an error.
441 assert False, "BuildHooksEnv called for NoHooksLUs"
445 """Tasklet base class.
447 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
448 they can mix legacy code with tasklets. Locking needs to be done in the LU,
449 tasklets know nothing about locks.
451 Subclasses must follow these rules:
452 - Implement CheckPrereq
456 def __init__(self, lu):
463 def CheckPrereq(self):
464 """Check prerequisites for this tasklets.
466 This method should check whether the prerequisites for the execution of
467 this tasklet are fulfilled. It can do internode communication, but it
468 should be idempotent - no cluster or system changes are allowed.
470 The method should raise errors.OpPrereqError in case something is not
471 fulfilled. Its return value is ignored.
473 This method should also update all parameters to their canonical form if it
474 hasn't been done before.
479 def Exec(self, feedback_fn):
480 """Execute the tasklet.
482 This method should implement the actual work. It should raise
483 errors.OpExecError for failures that are somewhat dealt with in code, or
487 raise NotImplementedError
490 def _GetWantedNodes(lu, nodes):
491 """Returns list of checked and expanded node names.
493 @type lu: L{LogicalUnit}
494 @param lu: the logical unit on whose behalf we execute
496 @param nodes: list of node names or None for all nodes
498 @return: the list of nodes, sorted
499 @raise errors.ProgrammerError: if the nodes parameter is wrong type
503 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
504 " non-empty list of nodes whose name is to be expanded.")
506 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
507 return utils.NiceSort(wanted)
510 def _GetWantedInstances(lu, instances):
511 """Returns list of checked and expanded instance names.
513 @type lu: L{LogicalUnit}
514 @param lu: the logical unit on whose behalf we execute
515 @type instances: list
516 @param instances: list of instance names or None for all instances
518 @return: the list of instances, sorted
519 @raise errors.OpPrereqError: if the instances parameter is wrong type
520 @raise errors.OpPrereqError: if any of the passed instances is not found
524 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
526 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
530 def _GetUpdatedParams(old_params, update_dict,
531 use_default=True, use_none=False):
532 """Return the new version of a parameter dictionary.
534 @type old_params: dict
535 @param old_params: old parameters
536 @type update_dict: dict
537 @param update_dict: dict containing new parameter values, or
538 constants.VALUE_DEFAULT to reset the parameter to its default
540 @param use_default: boolean
541 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
542 values as 'to be deleted' values
543 @param use_none: boolean
544 @type use_none: whether to recognise C{None} values as 'to be
547 @return: the new parameter dictionary
550 params_copy = copy.deepcopy(old_params)
551 for key, val in update_dict.iteritems():
552 if ((use_default and val == constants.VALUE_DEFAULT) or
553 (use_none and val is None)):
559 params_copy[key] = val
563 def _CheckOutputFields(static, dynamic, selected):
564 """Checks whether all selected fields are valid.
566 @type static: L{utils.FieldSet}
567 @param static: static fields set
568 @type dynamic: L{utils.FieldSet}
569 @param dynamic: dynamic fields set
576 delta = f.NonMatching(selected)
578 raise errors.OpPrereqError("Unknown output fields selected: %s"
579 % ",".join(delta), errors.ECODE_INVAL)
582 def _CheckGlobalHvParams(params):
583 """Validates that given hypervisor params are not global ones.
585 This will ensure that instances don't get customised versions of
589 used_globals = constants.HVC_GLOBALS.intersection(params)
591 msg = ("The following hypervisor parameters are global and cannot"
592 " be customized at instance level, please modify them at"
593 " cluster level: %s" % utils.CommaJoin(used_globals))
594 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
597 def _CheckNodeOnline(lu, node, msg=None):
598 """Ensure that a given node is online.
600 @param lu: the LU on behalf of which we make the check
601 @param node: the node to check
602 @param msg: if passed, should be a message to replace the default one
603 @raise errors.OpPrereqError: if the node is offline
607 msg = "Can't use offline node"
608 if lu.cfg.GetNodeInfo(node).offline:
609 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
612 def _CheckNodeNotDrained(lu, node):
613 """Ensure that a given node is not drained.
615 @param lu: the LU on behalf of which we make the check
616 @param node: the node to check
617 @raise errors.OpPrereqError: if the node is drained
620 if lu.cfg.GetNodeInfo(node).drained:
621 raise errors.OpPrereqError("Can't use drained node %s" % node,
625 def _CheckNodeVmCapable(lu, node):
626 """Ensure that a given node is vm capable.
628 @param lu: the LU on behalf of which we make the check
629 @param node: the node to check
630 @raise errors.OpPrereqError: if the node is not vm capable
633 if not lu.cfg.GetNodeInfo(node).vm_capable:
634 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
638 def _CheckNodeHasOS(lu, node, os_name, force_variant):
639 """Ensure that a node supports a given OS.
641 @param lu: the LU on behalf of which we make the check
642 @param node: the node to check
643 @param os_name: the OS to query about
644 @param force_variant: whether to ignore variant errors
645 @raise errors.OpPrereqError: if the node is not supporting the OS
648 result = lu.rpc.call_os_get(node, os_name)
649 result.Raise("OS '%s' not in supported OS list for node %s" %
651 prereq=True, ecode=errors.ECODE_INVAL)
652 if not force_variant:
653 _CheckOSVariant(result.payload, os_name)
656 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
657 """Ensure that a node has the given secondary ip.
659 @type lu: L{LogicalUnit}
660 @param lu: the LU on behalf of which we make the check
662 @param node: the node to check
663 @type secondary_ip: string
664 @param secondary_ip: the ip to check
665 @type prereq: boolean
666 @param prereq: whether to throw a prerequisite or an execute error
667 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
668 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
671 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
672 result.Raise("Failure checking secondary ip on node %s" % node,
673 prereq=prereq, ecode=errors.ECODE_ENVIRON)
674 if not result.payload:
675 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
676 " please fix and re-run this command" % secondary_ip)
678 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
680 raise errors.OpExecError(msg)
683 def _RequireFileStorage():
684 """Checks that file storage is enabled.
686 @raise errors.OpPrereqError: when file storage is disabled
689 if not constants.ENABLE_FILE_STORAGE:
690 raise errors.OpPrereqError("File storage disabled at configure time",
694 def _CheckDiskTemplate(template):
695 """Ensure a given disk template is valid.
698 if template not in constants.DISK_TEMPLATES:
699 msg = ("Invalid disk template name '%s', valid templates are: %s" %
700 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
701 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
702 if template == constants.DT_FILE:
703 _RequireFileStorage()
707 def _CheckStorageType(storage_type):
708 """Ensure a given storage type is valid.
711 if storage_type not in constants.VALID_STORAGE_TYPES:
712 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
714 if storage_type == constants.ST_FILE:
715 _RequireFileStorage()
719 def _GetClusterDomainSecret():
720 """Reads the cluster domain secret.
723 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
727 def _CheckInstanceDown(lu, instance, reason):
728 """Ensure that an instance is not running."""
729 if instance.admin_up:
730 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
731 (instance.name, reason), errors.ECODE_STATE)
733 pnode = instance.primary_node
734 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
735 ins_l.Raise("Can't contact node %s for instance information" % pnode,
736 prereq=True, ecode=errors.ECODE_ENVIRON)
738 if instance.name in ins_l.payload:
739 raise errors.OpPrereqError("Instance %s is running, %s" %
740 (instance.name, reason), errors.ECODE_STATE)
743 def _ExpandItemName(fn, name, kind):
744 """Expand an item name.
746 @param fn: the function to use for expansion
747 @param name: requested item name
748 @param kind: text description ('Node' or 'Instance')
749 @return: the resolved (full) name
750 @raise errors.OpPrereqError: if the item is not found
754 if full_name is None:
755 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
760 def _ExpandNodeName(cfg, name):
761 """Wrapper over L{_ExpandItemName} for nodes."""
762 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
765 def _ExpandInstanceName(cfg, name):
766 """Wrapper over L{_ExpandItemName} for instance."""
767 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
770 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
771 memory, vcpus, nics, disk_template, disks,
772 bep, hvp, hypervisor_name):
773 """Builds instance related env variables for hooks
775 This builds the hook environment from individual variables.
778 @param name: the name of the instance
779 @type primary_node: string
780 @param primary_node: the name of the instance's primary node
781 @type secondary_nodes: list
782 @param secondary_nodes: list of secondary nodes as strings
783 @type os_type: string
784 @param os_type: the name of the instance's OS
785 @type status: boolean
786 @param status: the should_run status of the instance
788 @param memory: the memory size of the instance
790 @param vcpus: the count of VCPUs the instance has
792 @param nics: list of tuples (ip, mac, mode, link) representing
793 the NICs the instance has
794 @type disk_template: string
795 @param disk_template: the disk template of the instance
797 @param disks: the list of (size, mode) pairs
799 @param bep: the backend parameters for the instance
801 @param hvp: the hypervisor parameters for the instance
802 @type hypervisor_name: string
803 @param hypervisor_name: the hypervisor for the instance
805 @return: the hook environment for this instance
814 "INSTANCE_NAME": name,
815 "INSTANCE_PRIMARY": primary_node,
816 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
817 "INSTANCE_OS_TYPE": os_type,
818 "INSTANCE_STATUS": str_status,
819 "INSTANCE_MEMORY": memory,
820 "INSTANCE_VCPUS": vcpus,
821 "INSTANCE_DISK_TEMPLATE": disk_template,
822 "INSTANCE_HYPERVISOR": hypervisor_name,
826 nic_count = len(nics)
827 for idx, (ip, mac, mode, link) in enumerate(nics):
830 env["INSTANCE_NIC%d_IP" % idx] = ip
831 env["INSTANCE_NIC%d_MAC" % idx] = mac
832 env["INSTANCE_NIC%d_MODE" % idx] = mode
833 env["INSTANCE_NIC%d_LINK" % idx] = link
834 if mode == constants.NIC_MODE_BRIDGED:
835 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
839 env["INSTANCE_NIC_COUNT"] = nic_count
842 disk_count = len(disks)
843 for idx, (size, mode) in enumerate(disks):
844 env["INSTANCE_DISK%d_SIZE" % idx] = size
845 env["INSTANCE_DISK%d_MODE" % idx] = mode
849 env["INSTANCE_DISK_COUNT"] = disk_count
851 for source, kind in [(bep, "BE"), (hvp, "HV")]:
852 for key, value in source.items():
853 env["INSTANCE_%s_%s" % (kind, key)] = value
858 def _NICListToTuple(lu, nics):
859 """Build a list of nic information tuples.
861 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
862 value in LUQueryInstanceData.
864 @type lu: L{LogicalUnit}
865 @param lu: the logical unit on whose behalf we execute
866 @type nics: list of L{objects.NIC}
867 @param nics: list of nics to convert to hooks tuples
871 cluster = lu.cfg.GetClusterInfo()
875 filled_params = cluster.SimpleFillNIC(nic.nicparams)
876 mode = filled_params[constants.NIC_MODE]
877 link = filled_params[constants.NIC_LINK]
878 hooks_nics.append((ip, mac, mode, link))
882 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
883 """Builds instance related env variables for hooks from an object.
885 @type lu: L{LogicalUnit}
886 @param lu: the logical unit on whose behalf we execute
887 @type instance: L{objects.Instance}
888 @param instance: the instance for which we should build the
891 @param override: dictionary with key/values that will override
894 @return: the hook environment dictionary
897 cluster = lu.cfg.GetClusterInfo()
898 bep = cluster.FillBE(instance)
899 hvp = cluster.FillHV(instance)
901 'name': instance.name,
902 'primary_node': instance.primary_node,
903 'secondary_nodes': instance.secondary_nodes,
904 'os_type': instance.os,
905 'status': instance.admin_up,
906 'memory': bep[constants.BE_MEMORY],
907 'vcpus': bep[constants.BE_VCPUS],
908 'nics': _NICListToTuple(lu, instance.nics),
909 'disk_template': instance.disk_template,
910 'disks': [(disk.size, disk.mode) for disk in instance.disks],
913 'hypervisor_name': instance.hypervisor,
916 args.update(override)
917 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
920 def _AdjustCandidatePool(lu, exceptions):
921 """Adjust the candidate pool after node operations.
924 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
926 lu.LogInfo("Promoted nodes to master candidate role: %s",
927 utils.CommaJoin(node.name for node in mod_list))
928 for name in mod_list:
929 lu.context.ReaddNode(name)
930 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
932 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
936 def _DecideSelfPromotion(lu, exceptions=None):
937 """Decide whether I should promote myself as a master candidate.
940 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
941 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
942 # the new node will increase mc_max with one, so:
943 mc_should = min(mc_should + 1, cp_size)
944 return mc_now < mc_should
947 def _CheckNicsBridgesExist(lu, target_nics, target_node):
948 """Check that the brigdes needed by a list of nics exist.
951 cluster = lu.cfg.GetClusterInfo()
952 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
953 brlist = [params[constants.NIC_LINK] for params in paramslist
954 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
956 result = lu.rpc.call_bridges_exist(target_node, brlist)
957 result.Raise("Error checking bridges on destination node '%s'" %
958 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
961 def _CheckInstanceBridgesExist(lu, instance, node=None):
962 """Check that the brigdes needed by an instance exist.
966 node = instance.primary_node
967 _CheckNicsBridgesExist(lu, instance.nics, node)
970 def _CheckOSVariant(os_obj, name):
971 """Check whether an OS name conforms to the os variants specification.
973 @type os_obj: L{objects.OS}
974 @param os_obj: OS object to check
976 @param name: OS name passed by the user, to check for validity
979 if not os_obj.supported_variants:
981 variant = objects.OS.GetVariant(name)
983 raise errors.OpPrereqError("OS name must include a variant",
986 if variant not in os_obj.supported_variants:
987 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
990 def _GetNodeInstancesInner(cfg, fn):
991 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
994 def _GetNodeInstances(cfg, node_name):
995 """Returns a list of all primary and secondary instances on a node.
999 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1002 def _GetNodePrimaryInstances(cfg, node_name):
1003 """Returns primary instances on a node.
1006 return _GetNodeInstancesInner(cfg,
1007 lambda inst: node_name == inst.primary_node)
1010 def _GetNodeSecondaryInstances(cfg, node_name):
1011 """Returns secondary instances on a node.
1014 return _GetNodeInstancesInner(cfg,
1015 lambda inst: node_name in inst.secondary_nodes)
1018 def _GetStorageTypeArgs(cfg, storage_type):
1019 """Returns the arguments for a storage type.
1022 # Special case for file storage
1023 if storage_type == constants.ST_FILE:
1024 # storage.FileStorage wants a list of storage directories
1025 return [[cfg.GetFileStorageDir()]]
1030 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1033 for dev in instance.disks:
1034 cfg.SetDiskID(dev, node_name)
1036 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1037 result.Raise("Failed to get disk status from node %s" % node_name,
1038 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1040 for idx, bdev_status in enumerate(result.payload):
1041 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1047 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1048 """Check the sanity of iallocator and node arguments and use the
1049 cluster-wide iallocator if appropriate.
1051 Check that at most one of (iallocator, node) is specified. If none is
1052 specified, then the LU's opcode's iallocator slot is filled with the
1053 cluster-wide default iallocator.
1055 @type iallocator_slot: string
1056 @param iallocator_slot: the name of the opcode iallocator slot
1057 @type node_slot: string
1058 @param node_slot: the name of the opcode target node slot
1061 node = getattr(lu.op, node_slot, None)
1062 iallocator = getattr(lu.op, iallocator_slot, None)
1064 if node is not None and iallocator is not None:
1065 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1067 elif node is None and iallocator is None:
1068 default_iallocator = lu.cfg.GetDefaultIAllocator()
1069 if default_iallocator:
1070 setattr(lu.op, iallocator_slot, default_iallocator)
1072 raise errors.OpPrereqError("No iallocator or node given and no"
1073 " cluster-wide default iallocator found."
1074 " Please specify either an iallocator or a"
1075 " node, or set a cluster-wide default"
1079 class LUPostInitCluster(LogicalUnit):
1080 """Logical unit for running hooks after cluster initialization.
1083 HPATH = "cluster-init"
1084 HTYPE = constants.HTYPE_CLUSTER
1086 def BuildHooksEnv(self):
1090 env = {"OP_TARGET": self.cfg.GetClusterName()}
1091 mn = self.cfg.GetMasterNode()
1092 return env, [], [mn]
1094 def Exec(self, feedback_fn):
1101 class LUDestroyCluster(LogicalUnit):
1102 """Logical unit for destroying the cluster.
1105 HPATH = "cluster-destroy"
1106 HTYPE = constants.HTYPE_CLUSTER
1108 def BuildHooksEnv(self):
1112 env = {"OP_TARGET": self.cfg.GetClusterName()}
1115 def CheckPrereq(self):
1116 """Check prerequisites.
1118 This checks whether the cluster is empty.
1120 Any errors are signaled by raising errors.OpPrereqError.
1123 master = self.cfg.GetMasterNode()
1125 nodelist = self.cfg.GetNodeList()
1126 if len(nodelist) != 1 or nodelist[0] != master:
1127 raise errors.OpPrereqError("There are still %d node(s) in"
1128 " this cluster." % (len(nodelist) - 1),
1130 instancelist = self.cfg.GetInstanceList()
1132 raise errors.OpPrereqError("There are still %d instance(s) in"
1133 " this cluster." % len(instancelist),
1136 def Exec(self, feedback_fn):
1137 """Destroys the cluster.
1140 master = self.cfg.GetMasterNode()
1142 # Run post hooks on master node before it's removed
1143 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1145 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1147 # pylint: disable-msg=W0702
1148 self.LogWarning("Errors occurred running hooks on %s" % master)
1150 result = self.rpc.call_node_stop_master(master, False)
1151 result.Raise("Could not disable the master role")
1156 def _VerifyCertificate(filename):
1157 """Verifies a certificate for LUVerifyCluster.
1159 @type filename: string
1160 @param filename: Path to PEM file
1164 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1165 utils.ReadFile(filename))
1166 except Exception, err: # pylint: disable-msg=W0703
1167 return (LUVerifyCluster.ETYPE_ERROR,
1168 "Failed to load X509 certificate %s: %s" % (filename, err))
1171 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1172 constants.SSL_CERT_EXPIRATION_ERROR)
1175 fnamemsg = "While verifying %s: %s" % (filename, msg)
1180 return (None, fnamemsg)
1181 elif errcode == utils.CERT_WARNING:
1182 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1183 elif errcode == utils.CERT_ERROR:
1184 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1186 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1189 class LUVerifyCluster(LogicalUnit):
1190 """Verifies the cluster status.
1193 HPATH = "cluster-verify"
1194 HTYPE = constants.HTYPE_CLUSTER
1196 ("skip_checks", ht.EmptyList,
1197 ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1198 ("verbose", False, ht.TBool),
1199 ("error_codes", False, ht.TBool),
1200 ("debug_simulate_errors", False, ht.TBool),
1204 TCLUSTER = "cluster"
1206 TINSTANCE = "instance"
1208 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1209 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1210 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1211 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1212 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1213 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1214 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1215 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1216 ENODEDRBD = (TNODE, "ENODEDRBD")
1217 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1218 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1219 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1220 ENODEHV = (TNODE, "ENODEHV")
1221 ENODELVM = (TNODE, "ENODELVM")
1222 ENODEN1 = (TNODE, "ENODEN1")
1223 ENODENET = (TNODE, "ENODENET")
1224 ENODEOS = (TNODE, "ENODEOS")
1225 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1226 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1227 ENODERPC = (TNODE, "ENODERPC")
1228 ENODESSH = (TNODE, "ENODESSH")
1229 ENODEVERSION = (TNODE, "ENODEVERSION")
1230 ENODESETUP = (TNODE, "ENODESETUP")
1231 ENODETIME = (TNODE, "ENODETIME")
1233 ETYPE_FIELD = "code"
1234 ETYPE_ERROR = "ERROR"
1235 ETYPE_WARNING = "WARNING"
1237 class NodeImage(object):
1238 """A class representing the logical and physical status of a node.
1241 @ivar name: the node name to which this object refers
1242 @ivar volumes: a structure as returned from
1243 L{ganeti.backend.GetVolumeList} (runtime)
1244 @ivar instances: a list of running instances (runtime)
1245 @ivar pinst: list of configured primary instances (config)
1246 @ivar sinst: list of configured secondary instances (config)
1247 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1248 of this node (config)
1249 @ivar mfree: free memory, as reported by hypervisor (runtime)
1250 @ivar dfree: free disk, as reported by the node (runtime)
1251 @ivar offline: the offline status (config)
1252 @type rpc_fail: boolean
1253 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1254 not whether the individual keys were correct) (runtime)
1255 @type lvm_fail: boolean
1256 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1257 @type hyp_fail: boolean
1258 @ivar hyp_fail: whether the RPC call didn't return the instance list
1259 @type ghost: boolean
1260 @ivar ghost: whether this is a known node or not (config)
1261 @type os_fail: boolean
1262 @ivar os_fail: whether the RPC call didn't return valid OS data
1264 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1265 @type vm_capable: boolean
1266 @ivar vm_capable: whether the node can host instances
1269 def __init__(self, offline=False, name=None, vm_capable=True):
1278 self.offline = offline
1279 self.vm_capable = vm_capable
1280 self.rpc_fail = False
1281 self.lvm_fail = False
1282 self.hyp_fail = False
1284 self.os_fail = False
1287 def ExpandNames(self):
1288 self.needed_locks = {
1289 locking.LEVEL_NODE: locking.ALL_SET,
1290 locking.LEVEL_INSTANCE: locking.ALL_SET,
1292 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1294 def _Error(self, ecode, item, msg, *args, **kwargs):
1295 """Format an error message.
1297 Based on the opcode's error_codes parameter, either format a
1298 parseable error code, or a simpler error string.
1300 This must be called only from Exec and functions called from Exec.
1303 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1305 # first complete the msg
1308 # then format the whole message
1309 if self.op.error_codes:
1310 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1316 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1317 # and finally report it via the feedback_fn
1318 self._feedback_fn(" - %s" % msg)
1320 def _ErrorIf(self, cond, *args, **kwargs):
1321 """Log an error message if the passed condition is True.
1324 cond = bool(cond) or self.op.debug_simulate_errors
1326 self._Error(*args, **kwargs)
1327 # do not mark the operation as failed for WARN cases only
1328 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1329 self.bad = self.bad or cond
1331 def _VerifyNode(self, ninfo, nresult):
1332 """Perform some basic validation on data returned from a node.
1334 - check the result data structure is well formed and has all the
1336 - check ganeti version
1338 @type ninfo: L{objects.Node}
1339 @param ninfo: the node to check
1340 @param nresult: the results from the node
1342 @return: whether overall this call was successful (and we can expect
1343 reasonable values in the respose)
1347 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1349 # main result, nresult should be a non-empty dict
1350 test = not nresult or not isinstance(nresult, dict)
1351 _ErrorIf(test, self.ENODERPC, node,
1352 "unable to verify node: no data returned")
1356 # compares ganeti version
1357 local_version = constants.PROTOCOL_VERSION
1358 remote_version = nresult.get("version", None)
1359 test = not (remote_version and
1360 isinstance(remote_version, (list, tuple)) and
1361 len(remote_version) == 2)
1362 _ErrorIf(test, self.ENODERPC, node,
1363 "connection to node returned invalid data")
1367 test = local_version != remote_version[0]
1368 _ErrorIf(test, self.ENODEVERSION, node,
1369 "incompatible protocol versions: master %s,"
1370 " node %s", local_version, remote_version[0])
1374 # node seems compatible, we can actually try to look into its results
1376 # full package version
1377 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1378 self.ENODEVERSION, node,
1379 "software version mismatch: master %s, node %s",
1380 constants.RELEASE_VERSION, remote_version[1],
1381 code=self.ETYPE_WARNING)
1383 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1384 if ninfo.vm_capable and isinstance(hyp_result, dict):
1385 for hv_name, hv_result in hyp_result.iteritems():
1386 test = hv_result is not None
1387 _ErrorIf(test, self.ENODEHV, node,
1388 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1390 test = nresult.get(constants.NV_NODESETUP,
1391 ["Missing NODESETUP results"])
1392 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1397 def _VerifyNodeTime(self, ninfo, nresult,
1398 nvinfo_starttime, nvinfo_endtime):
1399 """Check the node time.
1401 @type ninfo: L{objects.Node}
1402 @param ninfo: the node to check
1403 @param nresult: the remote results for the node
1404 @param nvinfo_starttime: the start time of the RPC call
1405 @param nvinfo_endtime: the end time of the RPC call
1409 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1411 ntime = nresult.get(constants.NV_TIME, None)
1413 ntime_merged = utils.MergeTime(ntime)
1414 except (ValueError, TypeError):
1415 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1418 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1419 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1420 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1421 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1425 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1426 "Node time diverges by at least %s from master node time",
1429 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1430 """Check the node time.
1432 @type ninfo: L{objects.Node}
1433 @param ninfo: the node to check
1434 @param nresult: the remote results for the node
1435 @param vg_name: the configured VG name
1442 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1444 # checks vg existence and size > 20G
1445 vglist = nresult.get(constants.NV_VGLIST, None)
1447 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1449 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1450 constants.MIN_VG_SIZE)
1451 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1454 pvlist = nresult.get(constants.NV_PVLIST, None)
1455 test = pvlist is None
1456 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1458 # check that ':' is not present in PV names, since it's a
1459 # special character for lvcreate (denotes the range of PEs to
1461 for _, pvname, owner_vg in pvlist:
1462 test = ":" in pvname
1463 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1464 " '%s' of VG '%s'", pvname, owner_vg)
1466 def _VerifyNodeNetwork(self, ninfo, nresult):
1467 """Check the node time.
1469 @type ninfo: L{objects.Node}
1470 @param ninfo: the node to check
1471 @param nresult: the remote results for the node
1475 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1477 test = constants.NV_NODELIST not in nresult
1478 _ErrorIf(test, self.ENODESSH, node,
1479 "node hasn't returned node ssh connectivity data")
1481 if nresult[constants.NV_NODELIST]:
1482 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1483 _ErrorIf(True, self.ENODESSH, node,
1484 "ssh communication with node '%s': %s", a_node, a_msg)
1486 test = constants.NV_NODENETTEST not in nresult
1487 _ErrorIf(test, self.ENODENET, node,
1488 "node hasn't returned node tcp connectivity data")
1490 if nresult[constants.NV_NODENETTEST]:
1491 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1493 _ErrorIf(True, self.ENODENET, node,
1494 "tcp communication with node '%s': %s",
1495 anode, nresult[constants.NV_NODENETTEST][anode])
1497 test = constants.NV_MASTERIP not in nresult
1498 _ErrorIf(test, self.ENODENET, node,
1499 "node hasn't returned node master IP reachability data")
1501 if not nresult[constants.NV_MASTERIP]:
1502 if node == self.master_node:
1503 msg = "the master node cannot reach the master IP (not configured?)"
1505 msg = "cannot reach the master IP"
1506 _ErrorIf(True, self.ENODENET, node, msg)
1508 def _VerifyInstance(self, instance, instanceconfig, node_image,
1510 """Verify an instance.
1512 This function checks to see if the required block devices are
1513 available on the instance's node.
1516 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1517 node_current = instanceconfig.primary_node
1519 node_vol_should = {}
1520 instanceconfig.MapLVsByNode(node_vol_should)
1522 for node in node_vol_should:
1523 n_img = node_image[node]
1524 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1525 # ignore missing volumes on offline or broken nodes
1527 for volume in node_vol_should[node]:
1528 test = volume not in n_img.volumes
1529 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1530 "volume %s missing on node %s", volume, node)
1532 if instanceconfig.admin_up:
1533 pri_img = node_image[node_current]
1534 test = instance not in pri_img.instances and not pri_img.offline
1535 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1536 "instance not running on its primary node %s",
1539 for node, n_img in node_image.items():
1540 if (not node == node_current):
1541 test = instance in n_img.instances
1542 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1543 "instance should not run on node %s", node)
1545 diskdata = [(nname, success, status, idx)
1546 for (nname, disks) in diskstatus.items()
1547 for idx, (success, status) in enumerate(disks)]
1549 for nname, success, bdev_status, idx in diskdata:
1550 _ErrorIf(instanceconfig.admin_up and not success,
1551 self.EINSTANCEFAULTYDISK, instance,
1552 "couldn't retrieve status for disk/%s on %s: %s",
1553 idx, nname, bdev_status)
1554 _ErrorIf((instanceconfig.admin_up and success and
1555 bdev_status.ldisk_status == constants.LDS_FAULTY),
1556 self.EINSTANCEFAULTYDISK, instance,
1557 "disk/%s on %s is faulty", idx, nname)
1559 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1560 """Verify if there are any unknown volumes in the cluster.
1562 The .os, .swap and backup volumes are ignored. All other volumes are
1563 reported as unknown.
1565 @type reserved: L{ganeti.utils.FieldSet}
1566 @param reserved: a FieldSet of reserved volume names
1569 for node, n_img in node_image.items():
1570 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1571 # skip non-healthy nodes
1573 for volume in n_img.volumes:
1574 test = ((node not in node_vol_should or
1575 volume not in node_vol_should[node]) and
1576 not reserved.Matches(volume))
1577 self._ErrorIf(test, self.ENODEORPHANLV, node,
1578 "volume %s is unknown", volume)
1580 def _VerifyOrphanInstances(self, instancelist, node_image):
1581 """Verify the list of running instances.
1583 This checks what instances are running but unknown to the cluster.
1586 for node, n_img in node_image.items():
1587 for o_inst in n_img.instances:
1588 test = o_inst not in instancelist
1589 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1590 "instance %s on node %s should not exist", o_inst, node)
1592 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1593 """Verify N+1 Memory Resilience.
1595 Check that if one single node dies we can still start all the
1596 instances it was primary for.
1599 for node, n_img in node_image.items():
1600 # This code checks that every node which is now listed as
1601 # secondary has enough memory to host all instances it is
1602 # supposed to should a single other node in the cluster fail.
1603 # FIXME: not ready for failover to an arbitrary node
1604 # FIXME: does not support file-backed instances
1605 # WARNING: we currently take into account down instances as well
1606 # as up ones, considering that even if they're down someone
1607 # might want to start them even in the event of a node failure.
1608 for prinode, instances in n_img.sbp.items():
1610 for instance in instances:
1611 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1612 if bep[constants.BE_AUTO_BALANCE]:
1613 needed_mem += bep[constants.BE_MEMORY]
1614 test = n_img.mfree < needed_mem
1615 self._ErrorIf(test, self.ENODEN1, node,
1616 "not enough memory on to accommodate"
1617 " failovers should peer node %s fail", prinode)
1619 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1621 """Verifies and computes the node required file checksums.
1623 @type ninfo: L{objects.Node}
1624 @param ninfo: the node to check
1625 @param nresult: the remote results for the node
1626 @param file_list: required list of files
1627 @param local_cksum: dictionary of local files and their checksums
1628 @param master_files: list of files that only masters should have
1632 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1634 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1635 test = not isinstance(remote_cksum, dict)
1636 _ErrorIf(test, self.ENODEFILECHECK, node,
1637 "node hasn't returned file checksum data")
1641 for file_name in file_list:
1642 node_is_mc = ninfo.master_candidate
1643 must_have = (file_name not in master_files) or node_is_mc
1645 test1 = file_name not in remote_cksum
1647 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1649 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1650 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1651 "file '%s' missing", file_name)
1652 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1653 "file '%s' has wrong checksum", file_name)
1654 # not candidate and this is not a must-have file
1655 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1656 "file '%s' should not exist on non master"
1657 " candidates (and the file is outdated)", file_name)
1658 # all good, except non-master/non-must have combination
1659 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1660 "file '%s' should not exist"
1661 " on non master candidates", file_name)
1663 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1665 """Verifies and the node DRBD status.
1667 @type ninfo: L{objects.Node}
1668 @param ninfo: the node to check
1669 @param nresult: the remote results for the node
1670 @param instanceinfo: the dict of instances
1671 @param drbd_helper: the configured DRBD usermode helper
1672 @param drbd_map: the DRBD map as returned by
1673 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1677 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1680 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1681 test = (helper_result == None)
1682 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1683 "no drbd usermode helper returned")
1685 status, payload = helper_result
1687 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1688 "drbd usermode helper check unsuccessful: %s", payload)
1689 test = status and (payload != drbd_helper)
1690 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1691 "wrong drbd usermode helper: %s", payload)
1693 # compute the DRBD minors
1695 for minor, instance in drbd_map[node].items():
1696 test = instance not in instanceinfo
1697 _ErrorIf(test, self.ECLUSTERCFG, None,
1698 "ghost instance '%s' in temporary DRBD map", instance)
1699 # ghost instance should not be running, but otherwise we
1700 # don't give double warnings (both ghost instance and
1701 # unallocated minor in use)
1703 node_drbd[minor] = (instance, False)
1705 instance = instanceinfo[instance]
1706 node_drbd[minor] = (instance.name, instance.admin_up)
1708 # and now check them
1709 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1710 test = not isinstance(used_minors, (tuple, list))
1711 _ErrorIf(test, self.ENODEDRBD, node,
1712 "cannot parse drbd status file: %s", str(used_minors))
1714 # we cannot check drbd status
1717 for minor, (iname, must_exist) in node_drbd.items():
1718 test = minor not in used_minors and must_exist
1719 _ErrorIf(test, self.ENODEDRBD, node,
1720 "drbd minor %d of instance %s is not active", minor, iname)
1721 for minor in used_minors:
1722 test = minor not in node_drbd
1723 _ErrorIf(test, self.ENODEDRBD, node,
1724 "unallocated drbd minor %d is in use", minor)
1726 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1727 """Builds the node OS structures.
1729 @type ninfo: L{objects.Node}
1730 @param ninfo: the node to check
1731 @param nresult: the remote results for the node
1732 @param nimg: the node image object
1736 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1738 remote_os = nresult.get(constants.NV_OSLIST, None)
1739 test = (not isinstance(remote_os, list) or
1740 not compat.all(isinstance(v, list) and len(v) == 7
1741 for v in remote_os))
1743 _ErrorIf(test, self.ENODEOS, node,
1744 "node hasn't returned valid OS data")
1753 for (name, os_path, status, diagnose,
1754 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1756 if name not in os_dict:
1759 # parameters is a list of lists instead of list of tuples due to
1760 # JSON lacking a real tuple type, fix it:
1761 parameters = [tuple(v) for v in parameters]
1762 os_dict[name].append((os_path, status, diagnose,
1763 set(variants), set(parameters), set(api_ver)))
1765 nimg.oslist = os_dict
1767 def _VerifyNodeOS(self, ninfo, nimg, base):
1768 """Verifies the node OS list.
1770 @type ninfo: L{objects.Node}
1771 @param ninfo: the node to check
1772 @param nimg: the node image object
1773 @param base: the 'template' node we match against (e.g. from the master)
1777 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1779 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1781 for os_name, os_data in nimg.oslist.items():
1782 assert os_data, "Empty OS status for OS %s?!" % os_name
1783 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1784 _ErrorIf(not f_status, self.ENODEOS, node,
1785 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1786 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1787 "OS '%s' has multiple entries (first one shadows the rest): %s",
1788 os_name, utils.CommaJoin([v[0] for v in os_data]))
1789 # this will catched in backend too
1790 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1791 and not f_var, self.ENODEOS, node,
1792 "OS %s with API at least %d does not declare any variant",
1793 os_name, constants.OS_API_V15)
1794 # comparisons with the 'base' image
1795 test = os_name not in base.oslist
1796 _ErrorIf(test, self.ENODEOS, node,
1797 "Extra OS %s not present on reference node (%s)",
1801 assert base.oslist[os_name], "Base node has empty OS status?"
1802 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1804 # base OS is invalid, skipping
1806 for kind, a, b in [("API version", f_api, b_api),
1807 ("variants list", f_var, b_var),
1808 ("parameters", f_param, b_param)]:
1809 _ErrorIf(a != b, self.ENODEOS, node,
1810 "OS %s %s differs from reference node %s: %s vs. %s",
1811 kind, os_name, base.name,
1812 utils.CommaJoin(a), utils.CommaJoin(b))
1814 # check any missing OSes
1815 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1816 _ErrorIf(missing, self.ENODEOS, node,
1817 "OSes present on reference node %s but missing on this node: %s",
1818 base.name, utils.CommaJoin(missing))
1820 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1821 """Verifies and updates the node volume data.
1823 This function will update a L{NodeImage}'s internal structures
1824 with data from the remote call.
1826 @type ninfo: L{objects.Node}
1827 @param ninfo: the node to check
1828 @param nresult: the remote results for the node
1829 @param nimg: the node image object
1830 @param vg_name: the configured VG name
1834 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1836 nimg.lvm_fail = True
1837 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1840 elif isinstance(lvdata, basestring):
1841 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1842 utils.SafeEncode(lvdata))
1843 elif not isinstance(lvdata, dict):
1844 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1846 nimg.volumes = lvdata
1847 nimg.lvm_fail = False
1849 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1850 """Verifies and updates the node instance list.
1852 If the listing was successful, then updates this node's instance
1853 list. Otherwise, it marks the RPC call as failed for the instance
1856 @type ninfo: L{objects.Node}
1857 @param ninfo: the node to check
1858 @param nresult: the remote results for the node
1859 @param nimg: the node image object
1862 idata = nresult.get(constants.NV_INSTANCELIST, None)
1863 test = not isinstance(idata, list)
1864 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1865 " (instancelist): %s", utils.SafeEncode(str(idata)))
1867 nimg.hyp_fail = True
1869 nimg.instances = idata
1871 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1872 """Verifies and computes a node information map
1874 @type ninfo: L{objects.Node}
1875 @param ninfo: the node to check
1876 @param nresult: the remote results for the node
1877 @param nimg: the node image object
1878 @param vg_name: the configured VG name
1882 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1884 # try to read free memory (from the hypervisor)
1885 hv_info = nresult.get(constants.NV_HVINFO, None)
1886 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1887 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1890 nimg.mfree = int(hv_info["memory_free"])
1891 except (ValueError, TypeError):
1892 _ErrorIf(True, self.ENODERPC, node,
1893 "node returned invalid nodeinfo, check hypervisor")
1895 # FIXME: devise a free space model for file based instances as well
1896 if vg_name is not None:
1897 test = (constants.NV_VGLIST not in nresult or
1898 vg_name not in nresult[constants.NV_VGLIST])
1899 _ErrorIf(test, self.ENODELVM, node,
1900 "node didn't return data for the volume group '%s'"
1901 " - it is either missing or broken", vg_name)
1904 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1905 except (ValueError, TypeError):
1906 _ErrorIf(True, self.ENODERPC, node,
1907 "node returned invalid LVM info, check LVM status")
1909 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1910 """Gets per-disk status information for all instances.
1912 @type nodelist: list of strings
1913 @param nodelist: Node names
1914 @type node_image: dict of (name, L{objects.Node})
1915 @param node_image: Node objects
1916 @type instanceinfo: dict of (name, L{objects.Instance})
1917 @param instanceinfo: Instance objects
1918 @rtype: {instance: {node: [(succes, payload)]}}
1919 @return: a dictionary of per-instance dictionaries with nodes as
1920 keys and disk information as values; the disk information is a
1921 list of tuples (success, payload)
1924 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1927 node_disks_devonly = {}
1928 diskless_instances = set()
1929 diskless = constants.DT_DISKLESS
1931 for nname in nodelist:
1932 node_instances = list(itertools.chain(node_image[nname].pinst,
1933 node_image[nname].sinst))
1934 diskless_instances.update(inst for inst in node_instances
1935 if instanceinfo[inst].disk_template == diskless)
1936 disks = [(inst, disk)
1937 for inst in node_instances
1938 for disk in instanceinfo[inst].disks]
1941 # No need to collect data
1944 node_disks[nname] = disks
1946 # Creating copies as SetDiskID below will modify the objects and that can
1947 # lead to incorrect data returned from nodes
1948 devonly = [dev.Copy() for (_, dev) in disks]
1951 self.cfg.SetDiskID(dev, nname)
1953 node_disks_devonly[nname] = devonly
1955 assert len(node_disks) == len(node_disks_devonly)
1957 # Collect data from all nodes with disks
1958 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1961 assert len(result) == len(node_disks)
1965 for (nname, nres) in result.items():
1966 disks = node_disks[nname]
1969 # No data from this node
1970 data = len(disks) * [(False, "node offline")]
1973 _ErrorIf(msg, self.ENODERPC, nname,
1974 "while getting disk information: %s", msg)
1976 # No data from this node
1977 data = len(disks) * [(False, msg)]
1980 for idx, i in enumerate(nres.payload):
1981 if isinstance(i, (tuple, list)) and len(i) == 2:
1984 logging.warning("Invalid result from node %s, entry %d: %s",
1986 data.append((False, "Invalid result from the remote node"))
1988 for ((inst, _), status) in zip(disks, data):
1989 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
1991 # Add empty entries for diskless instances.
1992 for inst in diskless_instances:
1993 assert inst not in instdisk
1996 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
1997 len(nnames) <= len(instanceinfo[inst].all_nodes) and
1998 compat.all(isinstance(s, (tuple, list)) and
1999 len(s) == 2 for s in statuses)
2000 for inst, nnames in instdisk.items()
2001 for nname, statuses in nnames.items())
2002 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2006 def BuildHooksEnv(self):
2009 Cluster-Verify hooks just ran in the post phase and their failure makes
2010 the output be logged in the verify output and the verification to fail.
2013 all_nodes = self.cfg.GetNodeList()
2015 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2017 for node in self.cfg.GetAllNodesInfo().values():
2018 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2020 return env, [], all_nodes
2022 def Exec(self, feedback_fn):
2023 """Verify integrity of cluster, performing various test on nodes.
2027 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2028 verbose = self.op.verbose
2029 self._feedback_fn = feedback_fn
2030 feedback_fn("* Verifying global settings")
2031 for msg in self.cfg.VerifyConfig():
2032 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2034 # Check the cluster certificates
2035 for cert_filename in constants.ALL_CERT_FILES:
2036 (errcode, msg) = _VerifyCertificate(cert_filename)
2037 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2039 vg_name = self.cfg.GetVGName()
2040 drbd_helper = self.cfg.GetDRBDHelper()
2041 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2042 cluster = self.cfg.GetClusterInfo()
2043 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2044 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2045 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2046 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2047 for iname in instancelist)
2048 i_non_redundant = [] # Non redundant instances
2049 i_non_a_balanced = [] # Non auto-balanced instances
2050 n_offline = 0 # Count of offline nodes
2051 n_drained = 0 # Count of nodes being drained
2052 node_vol_should = {}
2054 # FIXME: verify OS list
2055 # do local checksums
2056 master_files = [constants.CLUSTER_CONF_FILE]
2057 master_node = self.master_node = self.cfg.GetMasterNode()
2058 master_ip = self.cfg.GetMasterIP()
2060 file_names = ssconf.SimpleStore().GetFileList()
2061 file_names.extend(constants.ALL_CERT_FILES)
2062 file_names.extend(master_files)
2063 if cluster.modify_etc_hosts:
2064 file_names.append(constants.ETC_HOSTS)
2066 local_checksums = utils.FingerprintFiles(file_names)
2068 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2069 node_verify_param = {
2070 constants.NV_FILELIST: file_names,
2071 constants.NV_NODELIST: [node.name for node in nodeinfo
2072 if not node.offline],
2073 constants.NV_HYPERVISOR: hypervisors,
2074 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2075 node.secondary_ip) for node in nodeinfo
2076 if not node.offline],
2077 constants.NV_INSTANCELIST: hypervisors,
2078 constants.NV_VERSION: None,
2079 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2080 constants.NV_NODESETUP: None,
2081 constants.NV_TIME: None,
2082 constants.NV_MASTERIP: (master_node, master_ip),
2083 constants.NV_OSLIST: None,
2084 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2087 if vg_name is not None:
2088 node_verify_param[constants.NV_VGLIST] = None
2089 node_verify_param[constants.NV_LVLIST] = vg_name
2090 node_verify_param[constants.NV_PVLIST] = [vg_name]
2091 node_verify_param[constants.NV_DRBDLIST] = None
2094 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2096 # Build our expected cluster state
2097 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2099 vm_capable=node.vm_capable))
2100 for node in nodeinfo)
2102 for instance in instancelist:
2103 inst_config = instanceinfo[instance]
2105 for nname in inst_config.all_nodes:
2106 if nname not in node_image:
2108 gnode = self.NodeImage(name=nname)
2110 node_image[nname] = gnode
2112 inst_config.MapLVsByNode(node_vol_should)
2114 pnode = inst_config.primary_node
2115 node_image[pnode].pinst.append(instance)
2117 for snode in inst_config.secondary_nodes:
2118 nimg = node_image[snode]
2119 nimg.sinst.append(instance)
2120 if pnode not in nimg.sbp:
2121 nimg.sbp[pnode] = []
2122 nimg.sbp[pnode].append(instance)
2124 # At this point, we have the in-memory data structures complete,
2125 # except for the runtime information, which we'll gather next
2127 # Due to the way our RPC system works, exact response times cannot be
2128 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2129 # time before and after executing the request, we can at least have a time
2131 nvinfo_starttime = time.time()
2132 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2133 self.cfg.GetClusterName())
2134 nvinfo_endtime = time.time()
2136 all_drbd_map = self.cfg.ComputeDRBDMap()
2138 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2139 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2141 feedback_fn("* Verifying node status")
2145 for node_i in nodeinfo:
2147 nimg = node_image[node]
2151 feedback_fn("* Skipping offline node %s" % (node,))
2155 if node == master_node:
2157 elif node_i.master_candidate:
2158 ntype = "master candidate"
2159 elif node_i.drained:
2165 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2167 msg = all_nvinfo[node].fail_msg
2168 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2170 nimg.rpc_fail = True
2173 nresult = all_nvinfo[node].payload
2175 nimg.call_ok = self._VerifyNode(node_i, nresult)
2176 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2177 self._VerifyNodeNetwork(node_i, nresult)
2178 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2182 self._VerifyNodeLVM(node_i, nresult, vg_name)
2183 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2186 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2187 self._UpdateNodeInstances(node_i, nresult, nimg)
2188 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2189 self._UpdateNodeOS(node_i, nresult, nimg)
2190 if not nimg.os_fail:
2191 if refos_img is None:
2193 self._VerifyNodeOS(node_i, nimg, refos_img)
2195 feedback_fn("* Verifying instance status")
2196 for instance in instancelist:
2198 feedback_fn("* Verifying instance %s" % instance)
2199 inst_config = instanceinfo[instance]
2200 self._VerifyInstance(instance, inst_config, node_image,
2202 inst_nodes_offline = []
2204 pnode = inst_config.primary_node
2205 pnode_img = node_image[pnode]
2206 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2207 self.ENODERPC, pnode, "instance %s, connection to"
2208 " primary node failed", instance)
2210 if pnode_img.offline:
2211 inst_nodes_offline.append(pnode)
2213 # If the instance is non-redundant we cannot survive losing its primary
2214 # node, so we are not N+1 compliant. On the other hand we have no disk
2215 # templates with more than one secondary so that situation is not well
2217 # FIXME: does not support file-backed instances
2218 if not inst_config.secondary_nodes:
2219 i_non_redundant.append(instance)
2220 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2221 instance, "instance has multiple secondary nodes: %s",
2222 utils.CommaJoin(inst_config.secondary_nodes),
2223 code=self.ETYPE_WARNING)
2225 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2226 i_non_a_balanced.append(instance)
2228 for snode in inst_config.secondary_nodes:
2229 s_img = node_image[snode]
2230 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2231 "instance %s, connection to secondary node failed", instance)
2234 inst_nodes_offline.append(snode)
2236 # warn that the instance lives on offline nodes
2237 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2238 "instance lives on offline node(s) %s",
2239 utils.CommaJoin(inst_nodes_offline))
2240 # ... or ghost/non-vm_capable nodes
2241 for node in inst_config.all_nodes:
2242 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2243 "instance lives on ghost node %s", node)
2244 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2245 instance, "instance lives on non-vm_capable node %s", node)
2247 feedback_fn("* Verifying orphan volumes")
2248 reserved = utils.FieldSet(*cluster.reserved_lvs)
2249 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2251 feedback_fn("* Verifying orphan instances")
2252 self._VerifyOrphanInstances(instancelist, node_image)
2254 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2255 feedback_fn("* Verifying N+1 Memory redundancy")
2256 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2258 feedback_fn("* Other Notes")
2260 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2261 % len(i_non_redundant))
2263 if i_non_a_balanced:
2264 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2265 % len(i_non_a_balanced))
2268 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2271 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2275 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2276 """Analyze the post-hooks' result
2278 This method analyses the hook result, handles it, and sends some
2279 nicely-formatted feedback back to the user.
2281 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2282 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2283 @param hooks_results: the results of the multi-node hooks rpc call
2284 @param feedback_fn: function used send feedback back to the caller
2285 @param lu_result: previous Exec result
2286 @return: the new Exec result, based on the previous result
2290 # We only really run POST phase hooks, and are only interested in
2292 if phase == constants.HOOKS_PHASE_POST:
2293 # Used to change hooks' output to proper indentation
2294 indent_re = re.compile('^', re.M)
2295 feedback_fn("* Hooks Results")
2296 assert hooks_results, "invalid result from hooks"
2298 for node_name in hooks_results:
2299 res = hooks_results[node_name]
2301 test = msg and not res.offline
2302 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2303 "Communication failure in hooks execution: %s", msg)
2304 if res.offline or msg:
2305 # No need to investigate payload if node is offline or gave an error.
2306 # override manually lu_result here as _ErrorIf only
2307 # overrides self.bad
2310 for script, hkr, output in res.payload:
2311 test = hkr == constants.HKR_FAIL
2312 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2313 "Script %s failed, output:", script)
2315 output = indent_re.sub(' ', output)
2316 feedback_fn("%s" % output)
2322 class LUVerifyDisks(NoHooksLU):
2323 """Verifies the cluster disks status.
2328 def ExpandNames(self):
2329 self.needed_locks = {
2330 locking.LEVEL_NODE: locking.ALL_SET,
2331 locking.LEVEL_INSTANCE: locking.ALL_SET,
2333 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2335 def Exec(self, feedback_fn):
2336 """Verify integrity of cluster disks.
2338 @rtype: tuple of three items
2339 @return: a tuple of (dict of node-to-node_error, list of instances
2340 which need activate-disks, dict of instance: (node, volume) for
2344 result = res_nodes, res_instances, res_missing = {}, [], {}
2346 vg_name = self.cfg.GetVGName()
2347 nodes = utils.NiceSort(self.cfg.GetNodeList())
2348 instances = [self.cfg.GetInstanceInfo(name)
2349 for name in self.cfg.GetInstanceList()]
2352 for inst in instances:
2354 if (not inst.admin_up or
2355 inst.disk_template not in constants.DTS_NET_MIRROR):
2357 inst.MapLVsByNode(inst_lvs)
2358 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2359 for node, vol_list in inst_lvs.iteritems():
2360 for vol in vol_list:
2361 nv_dict[(node, vol)] = inst
2366 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2370 node_res = node_lvs[node]
2371 if node_res.offline:
2373 msg = node_res.fail_msg
2375 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2376 res_nodes[node] = msg
2379 lvs = node_res.payload
2380 for lv_name, (_, _, lv_online) in lvs.items():
2381 inst = nv_dict.pop((node, lv_name), None)
2382 if (not lv_online and inst is not None
2383 and inst.name not in res_instances):
2384 res_instances.append(inst.name)
2386 # any leftover items in nv_dict are missing LVs, let's arrange the
2388 for key, inst in nv_dict.iteritems():
2389 if inst.name not in res_missing:
2390 res_missing[inst.name] = []
2391 res_missing[inst.name].append(key)
2396 class LURepairDiskSizes(NoHooksLU):
2397 """Verifies the cluster disks sizes.
2400 _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2403 def ExpandNames(self):
2404 if self.op.instances:
2405 self.wanted_names = []
2406 for name in self.op.instances:
2407 full_name = _ExpandInstanceName(self.cfg, name)
2408 self.wanted_names.append(full_name)
2409 self.needed_locks = {
2410 locking.LEVEL_NODE: [],
2411 locking.LEVEL_INSTANCE: self.wanted_names,
2413 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2415 self.wanted_names = None
2416 self.needed_locks = {
2417 locking.LEVEL_NODE: locking.ALL_SET,
2418 locking.LEVEL_INSTANCE: locking.ALL_SET,
2420 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2422 def DeclareLocks(self, level):
2423 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2424 self._LockInstancesNodes(primary_only=True)
2426 def CheckPrereq(self):
2427 """Check prerequisites.
2429 This only checks the optional instance list against the existing names.
2432 if self.wanted_names is None:
2433 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2435 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2436 in self.wanted_names]
2438 def _EnsureChildSizes(self, disk):
2439 """Ensure children of the disk have the needed disk size.
2441 This is valid mainly for DRBD8 and fixes an issue where the
2442 children have smaller disk size.
2444 @param disk: an L{ganeti.objects.Disk} object
2447 if disk.dev_type == constants.LD_DRBD8:
2448 assert disk.children, "Empty children for DRBD8?"
2449 fchild = disk.children[0]
2450 mismatch = fchild.size < disk.size
2452 self.LogInfo("Child disk has size %d, parent %d, fixing",
2453 fchild.size, disk.size)
2454 fchild.size = disk.size
2456 # and we recurse on this child only, not on the metadev
2457 return self._EnsureChildSizes(fchild) or mismatch
2461 def Exec(self, feedback_fn):
2462 """Verify the size of cluster disks.
2465 # TODO: check child disks too
2466 # TODO: check differences in size between primary/secondary nodes
2468 for instance in self.wanted_instances:
2469 pnode = instance.primary_node
2470 if pnode not in per_node_disks:
2471 per_node_disks[pnode] = []
2472 for idx, disk in enumerate(instance.disks):
2473 per_node_disks[pnode].append((instance, idx, disk))
2476 for node, dskl in per_node_disks.items():
2477 newl = [v[2].Copy() for v in dskl]
2479 self.cfg.SetDiskID(dsk, node)
2480 result = self.rpc.call_blockdev_getsizes(node, newl)
2482 self.LogWarning("Failure in blockdev_getsizes call to node"
2483 " %s, ignoring", node)
2485 if len(result.data) != len(dskl):
2486 self.LogWarning("Invalid result from node %s, ignoring node results",
2489 for ((instance, idx, disk), size) in zip(dskl, result.data):
2491 self.LogWarning("Disk %d of instance %s did not return size"
2492 " information, ignoring", idx, instance.name)
2494 if not isinstance(size, (int, long)):
2495 self.LogWarning("Disk %d of instance %s did not return valid"
2496 " size information, ignoring", idx, instance.name)
2499 if size != disk.size:
2500 self.LogInfo("Disk %d of instance %s has mismatched size,"
2501 " correcting: recorded %d, actual %d", idx,
2502 instance.name, disk.size, size)
2504 self.cfg.Update(instance, feedback_fn)
2505 changed.append((instance.name, idx, size))
2506 if self._EnsureChildSizes(disk):
2507 self.cfg.Update(instance, feedback_fn)
2508 changed.append((instance.name, idx, disk.size))
2512 class LURenameCluster(LogicalUnit):
2513 """Rename the cluster.
2516 HPATH = "cluster-rename"
2517 HTYPE = constants.HTYPE_CLUSTER
2518 _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2520 def BuildHooksEnv(self):
2525 "OP_TARGET": self.cfg.GetClusterName(),
2526 "NEW_NAME": self.op.name,
2528 mn = self.cfg.GetMasterNode()
2529 all_nodes = self.cfg.GetNodeList()
2530 return env, [mn], all_nodes
2532 def CheckPrereq(self):
2533 """Verify that the passed name is a valid one.
2536 hostname = netutils.GetHostname(name=self.op.name,
2537 family=self.cfg.GetPrimaryIPFamily())
2539 new_name = hostname.name
2540 self.ip = new_ip = hostname.ip
2541 old_name = self.cfg.GetClusterName()
2542 old_ip = self.cfg.GetMasterIP()
2543 if new_name == old_name and new_ip == old_ip:
2544 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2545 " cluster has changed",
2547 if new_ip != old_ip:
2548 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2549 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2550 " reachable on the network" %
2551 new_ip, errors.ECODE_NOTUNIQUE)
2553 self.op.name = new_name
2555 def Exec(self, feedback_fn):
2556 """Rename the cluster.
2559 clustername = self.op.name
2562 # shutdown the master IP
2563 master = self.cfg.GetMasterNode()
2564 result = self.rpc.call_node_stop_master(master, False)
2565 result.Raise("Could not disable the master role")
2568 cluster = self.cfg.GetClusterInfo()
2569 cluster.cluster_name = clustername
2570 cluster.master_ip = ip
2571 self.cfg.Update(cluster, feedback_fn)
2573 # update the known hosts file
2574 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2575 node_list = self.cfg.GetNodeList()
2577 node_list.remove(master)
2580 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2582 result = self.rpc.call_node_start_master(master, False, False)
2583 msg = result.fail_msg
2585 self.LogWarning("Could not re-enable the master role on"
2586 " the master, please restart manually: %s", msg)
2591 class LUSetClusterParams(LogicalUnit):
2592 """Change the parameters of the cluster.
2595 HPATH = "cluster-modify"
2596 HTYPE = constants.HTYPE_CLUSTER
2598 ("vg_name", None, ht.TMaybeString),
2599 ("enabled_hypervisors", None,
2600 ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2602 ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2604 ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2605 ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2607 ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2609 ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2610 ("uid_pool", None, ht.NoType),
2611 ("add_uids", None, ht.NoType),
2612 ("remove_uids", None, ht.NoType),
2613 ("maintain_node_health", None, ht.TMaybeBool),
2614 ("prealloc_wipe_disks", None, ht.TMaybeBool),
2615 ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2616 ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2617 ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2618 ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2619 ("hidden_os", None, ht.TOr(ht.TListOf(\
2622 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2624 ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2627 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2632 def CheckArguments(self):
2636 if self.op.uid_pool:
2637 uidpool.CheckUidPool(self.op.uid_pool)
2639 if self.op.add_uids:
2640 uidpool.CheckUidPool(self.op.add_uids)
2642 if self.op.remove_uids:
2643 uidpool.CheckUidPool(self.op.remove_uids)
2645 def ExpandNames(self):
2646 # FIXME: in the future maybe other cluster params won't require checking on
2647 # all nodes to be modified.
2648 self.needed_locks = {
2649 locking.LEVEL_NODE: locking.ALL_SET,
2651 self.share_locks[locking.LEVEL_NODE] = 1
2653 def BuildHooksEnv(self):
2658 "OP_TARGET": self.cfg.GetClusterName(),
2659 "NEW_VG_NAME": self.op.vg_name,
2661 mn = self.cfg.GetMasterNode()
2662 return env, [mn], [mn]
2664 def CheckPrereq(self):
2665 """Check prerequisites.
2667 This checks whether the given params don't conflict and
2668 if the given volume group is valid.
2671 if self.op.vg_name is not None and not self.op.vg_name:
2672 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2673 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2674 " instances exist", errors.ECODE_INVAL)
2676 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2677 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2678 raise errors.OpPrereqError("Cannot disable drbd helper while"
2679 " drbd-based instances exist",
2682 node_list = self.acquired_locks[locking.LEVEL_NODE]
2684 # if vg_name not None, checks given volume group on all nodes
2686 vglist = self.rpc.call_vg_list(node_list)
2687 for node in node_list:
2688 msg = vglist[node].fail_msg
2690 # ignoring down node
2691 self.LogWarning("Error while gathering data on node %s"
2692 " (ignoring node): %s", node, msg)
2694 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2696 constants.MIN_VG_SIZE)
2698 raise errors.OpPrereqError("Error on node '%s': %s" %
2699 (node, vgstatus), errors.ECODE_ENVIRON)
2701 if self.op.drbd_helper:
2702 # checks given drbd helper on all nodes
2703 helpers = self.rpc.call_drbd_helper(node_list)
2704 for node in node_list:
2705 ninfo = self.cfg.GetNodeInfo(node)
2707 self.LogInfo("Not checking drbd helper on offline node %s", node)
2709 msg = helpers[node].fail_msg
2711 raise errors.OpPrereqError("Error checking drbd helper on node"
2712 " '%s': %s" % (node, msg),
2713 errors.ECODE_ENVIRON)
2714 node_helper = helpers[node].payload
2715 if node_helper != self.op.drbd_helper:
2716 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2717 (node, node_helper), errors.ECODE_ENVIRON)
2719 self.cluster = cluster = self.cfg.GetClusterInfo()
2720 # validate params changes
2721 if self.op.beparams:
2722 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2723 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2725 if self.op.nicparams:
2726 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2727 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2728 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2731 # check all instances for consistency
2732 for instance in self.cfg.GetAllInstancesInfo().values():
2733 for nic_idx, nic in enumerate(instance.nics):
2734 params_copy = copy.deepcopy(nic.nicparams)
2735 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2737 # check parameter syntax
2739 objects.NIC.CheckParameterSyntax(params_filled)
2740 except errors.ConfigurationError, err:
2741 nic_errors.append("Instance %s, nic/%d: %s" %
2742 (instance.name, nic_idx, err))
2744 # if we're moving instances to routed, check that they have an ip
2745 target_mode = params_filled[constants.NIC_MODE]
2746 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2747 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2748 (instance.name, nic_idx))
2750 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2751 "\n".join(nic_errors))
2753 # hypervisor list/parameters
2754 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2755 if self.op.hvparams:
2756 for hv_name, hv_dict in self.op.hvparams.items():
2757 if hv_name not in self.new_hvparams:
2758 self.new_hvparams[hv_name] = hv_dict
2760 self.new_hvparams[hv_name].update(hv_dict)
2762 # os hypervisor parameters
2763 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2765 for os_name, hvs in self.op.os_hvp.items():
2766 if os_name not in self.new_os_hvp:
2767 self.new_os_hvp[os_name] = hvs
2769 for hv_name, hv_dict in hvs.items():
2770 if hv_name not in self.new_os_hvp[os_name]:
2771 self.new_os_hvp[os_name][hv_name] = hv_dict
2773 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2776 self.new_osp = objects.FillDict(cluster.osparams, {})
2777 if self.op.osparams:
2778 for os_name, osp in self.op.osparams.items():
2779 if os_name not in self.new_osp:
2780 self.new_osp[os_name] = {}
2782 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2785 if not self.new_osp[os_name]:
2786 # we removed all parameters
2787 del self.new_osp[os_name]
2789 # check the parameter validity (remote check)
2790 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2791 os_name, self.new_osp[os_name])
2793 # changes to the hypervisor list
2794 if self.op.enabled_hypervisors is not None:
2795 self.hv_list = self.op.enabled_hypervisors
2796 for hv in self.hv_list:
2797 # if the hypervisor doesn't already exist in the cluster
2798 # hvparams, we initialize it to empty, and then (in both
2799 # cases) we make sure to fill the defaults, as we might not
2800 # have a complete defaults list if the hypervisor wasn't
2802 if hv not in new_hvp:
2804 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2805 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2807 self.hv_list = cluster.enabled_hypervisors
2809 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2810 # either the enabled list has changed, or the parameters have, validate
2811 for hv_name, hv_params in self.new_hvparams.items():
2812 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2813 (self.op.enabled_hypervisors and
2814 hv_name in self.op.enabled_hypervisors)):
2815 # either this is a new hypervisor, or its parameters have changed
2816 hv_class = hypervisor.GetHypervisor(hv_name)
2817 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2818 hv_class.CheckParameterSyntax(hv_params)
2819 _CheckHVParams(self, node_list, hv_name, hv_params)
2822 # no need to check any newly-enabled hypervisors, since the
2823 # defaults have already been checked in the above code-block
2824 for os_name, os_hvp in self.new_os_hvp.items():
2825 for hv_name, hv_params in os_hvp.items():
2826 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2827 # we need to fill in the new os_hvp on top of the actual hv_p
2828 cluster_defaults = self.new_hvparams.get(hv_name, {})
2829 new_osp = objects.FillDict(cluster_defaults, hv_params)
2830 hv_class = hypervisor.GetHypervisor(hv_name)
2831 hv_class.CheckParameterSyntax(new_osp)
2832 _CheckHVParams(self, node_list, hv_name, new_osp)
2834 if self.op.default_iallocator:
2835 alloc_script = utils.FindFile(self.op.default_iallocator,
2836 constants.IALLOCATOR_SEARCH_PATH,
2838 if alloc_script is None:
2839 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2840 " specified" % self.op.default_iallocator,
2843 def Exec(self, feedback_fn):
2844 """Change the parameters of the cluster.
2847 if self.op.vg_name is not None:
2848 new_volume = self.op.vg_name
2851 if new_volume != self.cfg.GetVGName():
2852 self.cfg.SetVGName(new_volume)
2854 feedback_fn("Cluster LVM configuration already in desired"
2855 " state, not changing")
2856 if self.op.drbd_helper is not None:
2857 new_helper = self.op.drbd_helper
2860 if new_helper != self.cfg.GetDRBDHelper():
2861 self.cfg.SetDRBDHelper(new_helper)
2863 feedback_fn("Cluster DRBD helper already in desired state,"
2865 if self.op.hvparams:
2866 self.cluster.hvparams = self.new_hvparams
2868 self.cluster.os_hvp = self.new_os_hvp
2869 if self.op.enabled_hypervisors is not None:
2870 self.cluster.hvparams = self.new_hvparams
2871 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2872 if self.op.beparams:
2873 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2874 if self.op.nicparams:
2875 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2876 if self.op.osparams:
2877 self.cluster.osparams = self.new_osp
2879 if self.op.candidate_pool_size is not None:
2880 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2881 # we need to update the pool size here, otherwise the save will fail
2882 _AdjustCandidatePool(self, [])
2884 if self.op.maintain_node_health is not None:
2885 self.cluster.maintain_node_health = self.op.maintain_node_health
2887 if self.op.prealloc_wipe_disks is not None:
2888 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2890 if self.op.add_uids is not None:
2891 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2893 if self.op.remove_uids is not None:
2894 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2896 if self.op.uid_pool is not None:
2897 self.cluster.uid_pool = self.op.uid_pool
2899 if self.op.default_iallocator is not None:
2900 self.cluster.default_iallocator = self.op.default_iallocator
2902 if self.op.reserved_lvs is not None:
2903 self.cluster.reserved_lvs = self.op.reserved_lvs
2905 def helper_os(aname, mods, desc):
2907 lst = getattr(self.cluster, aname)
2908 for key, val in mods:
2909 if key == constants.DDM_ADD:
2911 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2914 elif key == constants.DDM_REMOVE:
2918 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2920 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2922 if self.op.hidden_os:
2923 helper_os("hidden_os", self.op.hidden_os, "hidden")
2925 if self.op.blacklisted_os:
2926 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2928 self.cfg.Update(self.cluster, feedback_fn)
2931 def _UploadHelper(lu, nodes, fname):
2932 """Helper for uploading a file and showing warnings.
2935 if os.path.exists(fname):
2936 result = lu.rpc.call_upload_file(nodes, fname)
2937 for to_node, to_result in result.items():
2938 msg = to_result.fail_msg
2940 msg = ("Copy of file %s to node %s failed: %s" %
2941 (fname, to_node, msg))
2942 lu.proc.LogWarning(msg)
2945 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
2946 """Distribute additional files which are part of the cluster configuration.
2948 ConfigWriter takes care of distributing the config and ssconf files, but
2949 there are more files which should be distributed to all nodes. This function
2950 makes sure those are copied.
2952 @param lu: calling logical unit
2953 @param additional_nodes: list of nodes not in the config to distribute to
2954 @type additional_vm: boolean
2955 @param additional_vm: whether the additional nodes are vm-capable or not
2958 # 1. Gather target nodes
2959 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2960 dist_nodes = lu.cfg.GetOnlineNodeList()
2961 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
2962 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
2963 if additional_nodes is not None:
2964 dist_nodes.extend(additional_nodes)
2966 vm_nodes.extend(additional_nodes)
2967 if myself.name in dist_nodes:
2968 dist_nodes.remove(myself.name)
2969 if myself.name in vm_nodes:
2970 vm_nodes.remove(myself.name)
2972 # 2. Gather files to distribute
2973 dist_files = set([constants.ETC_HOSTS,
2974 constants.SSH_KNOWN_HOSTS_FILE,
2975 constants.RAPI_CERT_FILE,
2976 constants.RAPI_USERS_FILE,
2977 constants.CONFD_HMAC_KEY,
2978 constants.CLUSTER_DOMAIN_SECRET_FILE,
2982 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2983 for hv_name in enabled_hypervisors:
2984 hv_class = hypervisor.GetHypervisor(hv_name)
2985 vm_files.update(hv_class.GetAncillaryFiles())
2987 # 3. Perform the files upload
2988 for fname in dist_files:
2989 _UploadHelper(lu, dist_nodes, fname)
2990 for fname in vm_files:
2991 _UploadHelper(lu, vm_nodes, fname)
2994 class LURedistributeConfig(NoHooksLU):
2995 """Force the redistribution of cluster configuration.
2997 This is a very simple LU.
3002 def ExpandNames(self):
3003 self.needed_locks = {
3004 locking.LEVEL_NODE: locking.ALL_SET,
3006 self.share_locks[locking.LEVEL_NODE] = 1
3008 def Exec(self, feedback_fn):
3009 """Redistribute the configuration.
3012 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3013 _RedistributeAncillaryFiles(self)
3016 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3017 """Sleep and poll for an instance's disk to sync.
3020 if not instance.disks or disks is not None and not disks:
3023 disks = _ExpandCheckDisks(instance, disks)
3026 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3028 node = instance.primary_node
3031 lu.cfg.SetDiskID(dev, node)
3033 # TODO: Convert to utils.Retry
3036 degr_retries = 10 # in seconds, as we sleep 1 second each time
3040 cumul_degraded = False
3041 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3042 msg = rstats.fail_msg
3044 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3047 raise errors.RemoteError("Can't contact node %s for mirror data,"
3048 " aborting." % node)
3051 rstats = rstats.payload
3053 for i, mstat in enumerate(rstats):
3055 lu.LogWarning("Can't compute data for node %s/%s",
3056 node, disks[i].iv_name)
3059 cumul_degraded = (cumul_degraded or
3060 (mstat.is_degraded and mstat.sync_percent is None))
3061 if mstat.sync_percent is not None:
3063 if mstat.estimated_time is not None:
3064 rem_time = ("%s remaining (estimated)" %
3065 utils.FormatSeconds(mstat.estimated_time))
3066 max_time = mstat.estimated_time
3068 rem_time = "no time estimate"
3069 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3070 (disks[i].iv_name, mstat.sync_percent, rem_time))
3072 # if we're done but degraded, let's do a few small retries, to
3073 # make sure we see a stable and not transient situation; therefore
3074 # we force restart of the loop
3075 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3076 logging.info("Degraded disks found, %d retries left", degr_retries)
3084 time.sleep(min(60, max_time))
3087 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3088 return not cumul_degraded
3091 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3092 """Check that mirrors are not degraded.
3094 The ldisk parameter, if True, will change the test from the
3095 is_degraded attribute (which represents overall non-ok status for
3096 the device(s)) to the ldisk (representing the local storage status).
3099 lu.cfg.SetDiskID(dev, node)
3103 if on_primary or dev.AssembleOnSecondary():
3104 rstats = lu.rpc.call_blockdev_find(node, dev)
3105 msg = rstats.fail_msg
3107 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3109 elif not rstats.payload:
3110 lu.LogWarning("Can't find disk on node %s", node)
3114 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3116 result = result and not rstats.payload.is_degraded
3119 for child in dev.children:
3120 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3125 class LUDiagnoseOS(NoHooksLU):
3126 """Logical unit for OS diagnose/query.
3131 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3135 _BLK = "blacklisted"
3137 _FIELDS_STATIC = utils.FieldSet()
3138 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3139 "parameters", "api_versions", _HID, _BLK)
3141 def CheckArguments(self):
3143 raise errors.OpPrereqError("Selective OS query not supported",
3146 _CheckOutputFields(static=self._FIELDS_STATIC,
3147 dynamic=self._FIELDS_DYNAMIC,
3148 selected=self.op.output_fields)
3150 def ExpandNames(self):
3151 # Lock all nodes, in shared mode
3152 # Temporary removal of locks, should be reverted later
3153 # TODO: reintroduce locks when they are lighter-weight
3154 self.needed_locks = {}
3155 #self.share_locks[locking.LEVEL_NODE] = 1
3156 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3159 def _DiagnoseByOS(rlist):
3160 """Remaps a per-node return list into an a per-os per-node dictionary
3162 @param rlist: a map with node names as keys and OS objects as values
3165 @return: a dictionary with osnames as keys and as value another
3166 map, with nodes as keys and tuples of (path, status, diagnose,
3167 variants, parameters, api_versions) as values, eg::
3169 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3170 (/srv/..., False, "invalid api")],
3171 "node2": [(/srv/..., True, "", [], [])]}
3176 # we build here the list of nodes that didn't fail the RPC (at RPC
3177 # level), so that nodes with a non-responding node daemon don't
3178 # make all OSes invalid
3179 good_nodes = [node_name for node_name in rlist
3180 if not rlist[node_name].fail_msg]
3181 for node_name, nr in rlist.items():
3182 if nr.fail_msg or not nr.payload:
3184 for (name, path, status, diagnose, variants,
3185 params, api_versions) in nr.payload:
3186 if name not in all_os:
3187 # build a list of nodes for this os containing empty lists
3188 # for each node in node_list
3190 for nname in good_nodes:
3191 all_os[name][nname] = []
3192 # convert params from [name, help] to (name, help)
3193 params = [tuple(v) for v in params]
3194 all_os[name][node_name].append((path, status, diagnose,
3195 variants, params, api_versions))
3198 def Exec(self, feedback_fn):
3199 """Compute the list of OSes.
3202 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3203 node_data = self.rpc.call_os_diagnose(valid_nodes)
3204 pol = self._DiagnoseByOS(node_data)
3206 cluster = self.cfg.GetClusterInfo()
3208 for os_name in utils.NiceSort(pol.keys()):
3209 os_data = pol[os_name]
3212 (variants, params, api_versions) = null_state = (set(), set(), set())
3213 for idx, osl in enumerate(os_data.values()):
3214 valid = bool(valid and osl and osl[0][1])
3216 (variants, params, api_versions) = null_state
3218 node_variants, node_params, node_api = osl[0][3:6]
3219 if idx == 0: # first entry
3220 variants = set(node_variants)
3221 params = set(node_params)
3222 api_versions = set(node_api)
3223 else: # keep consistency
3224 variants.intersection_update(node_variants)
3225 params.intersection_update(node_params)
3226 api_versions.intersection_update(node_api)
3228 is_hid = os_name in cluster.hidden_os
3229 is_blk = os_name in cluster.blacklisted_os
3230 if ((self._HID not in self.op.output_fields and is_hid) or
3231 (self._BLK not in self.op.output_fields and is_blk) or
3232 (self._VLD not in self.op.output_fields and not valid)):
3235 for field in self.op.output_fields:
3238 elif field == self._VLD:
3240 elif field == "node_status":
3241 # this is just a copy of the dict
3243 for node_name, nos_list in os_data.items():
3244 val[node_name] = nos_list
3245 elif field == "variants":
3246 val = utils.NiceSort(list(variants))
3247 elif field == "parameters":
3249 elif field == "api_versions":
3250 val = list(api_versions)
3251 elif field == self._HID:
3253 elif field == self._BLK:
3256 raise errors.ParameterError(field)
3263 class LURemoveNode(LogicalUnit):
3264 """Logical unit for removing a node.
3267 HPATH = "node-remove"
3268 HTYPE = constants.HTYPE_NODE
3273 def BuildHooksEnv(self):
3276 This doesn't run on the target node in the pre phase as a failed
3277 node would then be impossible to remove.
3281 "OP_TARGET": self.op.node_name,
3282 "NODE_NAME": self.op.node_name,
3284 all_nodes = self.cfg.GetNodeList()
3286 all_nodes.remove(self.op.node_name)
3288 logging.warning("Node %s which is about to be removed not found"
3289 " in the all nodes list", self.op.node_name)
3290 return env, all_nodes, all_nodes
3292 def CheckPrereq(self):
3293 """Check prerequisites.
3296 - the node exists in the configuration
3297 - it does not have primary or secondary instances
3298 - it's not the master
3300 Any errors are signaled by raising errors.OpPrereqError.
3303 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3304 node = self.cfg.GetNodeInfo(self.op.node_name)
3305 assert node is not None
3307 instance_list = self.cfg.GetInstanceList()
3309 masternode = self.cfg.GetMasterNode()
3310 if node.name == masternode:
3311 raise errors.OpPrereqError("Node is the master node,"
3312 " you need to failover first.",
3315 for instance_name in instance_list:
3316 instance = self.cfg.GetInstanceInfo(instance_name)
3317 if node.name in instance.all_nodes:
3318 raise errors.OpPrereqError("Instance %s is still running on the node,"
3319 " please remove first." % instance_name,
3321 self.op.node_name = node.name
3324 def Exec(self, feedback_fn):
3325 """Removes the node from the cluster.
3329 logging.info("Stopping the node daemon and removing configs from node %s",
3332 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3334 # Promote nodes to master candidate as needed
3335 _AdjustCandidatePool(self, exceptions=[node.name])
3336 self.context.RemoveNode(node.name)
3338 # Run post hooks on the node before it's removed
3339 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3341 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3343 # pylint: disable-msg=W0702
3344 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3346 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3347 msg = result.fail_msg
3349 self.LogWarning("Errors encountered on the remote node while leaving"
3350 " the cluster: %s", msg)
3352 # Remove node from our /etc/hosts
3353 if self.cfg.GetClusterInfo().modify_etc_hosts:
3354 master_node = self.cfg.GetMasterNode()
3355 result = self.rpc.call_etc_hosts_modify(master_node,
3356 constants.ETC_HOSTS_REMOVE,
3358 result.Raise("Can't update hosts file with new host data")
3359 _RedistributeAncillaryFiles(self)
3362 class LUQueryNodes(NoHooksLU):
3363 """Logical unit for querying nodes.
3366 # pylint: disable-msg=W0142
3369 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3370 ("use_locking", False, ht.TBool),
3374 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3375 "master_candidate", "offline", "drained",
3376 "master_capable", "vm_capable"]
3378 _FIELDS_DYNAMIC = utils.FieldSet(
3380 "mtotal", "mnode", "mfree",
3382 "ctotal", "cnodes", "csockets",
3385 _FIELDS_STATIC = utils.FieldSet(*[
3386 "pinst_cnt", "sinst_cnt",
3387 "pinst_list", "sinst_list",
3388 "pip", "sip", "tags",
3390 "role"] + _SIMPLE_FIELDS
3393 def CheckArguments(self):
3394 _CheckOutputFields(static=self._FIELDS_STATIC,
3395 dynamic=self._FIELDS_DYNAMIC,
3396 selected=self.op.output_fields)
3398 def ExpandNames(self):
3399 self.needed_locks = {}
3400 self.share_locks[locking.LEVEL_NODE] = 1
3403 self.wanted = _GetWantedNodes(self, self.op.names)
3405 self.wanted = locking.ALL_SET
3407 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3408 self.do_locking = self.do_node_query and self.op.use_locking
3410 # if we don't request only static fields, we need to lock the nodes
3411 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3413 def Exec(self, feedback_fn):
3414 """Computes the list of nodes and their attributes.
3417 all_info = self.cfg.GetAllNodesInfo()
3419 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3420 elif self.wanted != locking.ALL_SET:
3421 nodenames = self.wanted
3422 missing = set(nodenames).difference(all_info.keys())
3424 raise errors.OpExecError(
3425 "Some nodes were removed before retrieving their data: %s" % missing)
3427 nodenames = all_info.keys()
3429 nodenames = utils.NiceSort(nodenames)
3430 nodelist = [all_info[name] for name in nodenames]
3432 # begin data gathering
3434 if self.do_node_query:
3436 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3437 self.cfg.GetHypervisorType())
3438 for name in nodenames:
3439 nodeinfo = node_data[name]
3440 if not nodeinfo.fail_msg and nodeinfo.payload:
3441 nodeinfo = nodeinfo.payload
3442 fn = utils.TryConvert
3444 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3445 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3446 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3447 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3448 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3449 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3450 "bootid": nodeinfo.get('bootid', None),
3451 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3452 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3455 live_data[name] = {}
3457 live_data = dict.fromkeys(nodenames, {})
3459 node_to_primary = dict([(name, set()) for name in nodenames])
3460 node_to_secondary = dict([(name, set()) for name in nodenames])
3462 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3463 "sinst_cnt", "sinst_list"))
3464 if inst_fields & frozenset(self.op.output_fields):
3465 inst_data = self.cfg.GetAllInstancesInfo()
3467 for inst in inst_data.values():
3468 if inst.primary_node in node_to_primary:
3469 node_to_primary[inst.primary_node].add(inst.name)
3470 for secnode in inst.secondary_nodes:
3471 if secnode in node_to_secondary:
3472 node_to_secondary[secnode].add(inst.name)
3474 master_node = self.cfg.GetMasterNode()
3476 # end data gathering
3479 for node in nodelist:
3481 for field in self.op.output_fields:
3482 if field in self._SIMPLE_FIELDS:
3483 val = getattr(node, field)
3484 elif field == "pinst_list":
3485 val = list(node_to_primary[node.name])
3486 elif field == "sinst_list":
3487 val = list(node_to_secondary[node.name])
3488 elif field == "pinst_cnt":
3489 val = len(node_to_primary[node.name])
3490 elif field == "sinst_cnt":
3491 val = len(node_to_secondary[node.name])
3492 elif field == "pip":
3493 val = node.primary_ip
3494 elif field == "sip":
3495 val = node.secondary_ip
3496 elif field == "tags":
3497 val = list(node.GetTags())
3498 elif field == "master":
3499 val = node.name == master_node
3500 elif self._FIELDS_DYNAMIC.Matches(field):
3501 val = live_data[node.name].get(field, None)
3502 elif field == "role":
3503 if node.name == master_node:
3505 elif node.master_candidate:
3514 raise errors.ParameterError(field)
3515 node_output.append(val)
3516 output.append(node_output)
3521 class LUQueryNodeVolumes(NoHooksLU):
3522 """Logical unit for getting volumes on node(s).
3526 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3527 ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3530 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3531 _FIELDS_STATIC = utils.FieldSet("node")
3533 def CheckArguments(self):
3534 _CheckOutputFields(static=self._FIELDS_STATIC,
3535 dynamic=self._FIELDS_DYNAMIC,
3536 selected=self.op.output_fields)
3538 def ExpandNames(self):
3539 self.needed_locks = {}
3540 self.share_locks[locking.LEVEL_NODE] = 1
3541 if not self.op.nodes:
3542 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3544 self.needed_locks[locking.LEVEL_NODE] = \
3545 _GetWantedNodes(self, self.op.nodes)
3547 def Exec(self, feedback_fn):
3548 """Computes the list of nodes and their attributes.
3551 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3552 volumes = self.rpc.call_node_volumes(nodenames)
3554 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3555 in self.cfg.GetInstanceList()]
3557 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3560 for node in nodenames:
3561 nresult = volumes[node]
3564 msg = nresult.fail_msg
3566 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3569 node_vols = nresult.payload[:]
3570 node_vols.sort(key=lambda vol: vol['dev'])
3572 for vol in node_vols:
3574 for field in self.op.output_fields:
3577 elif field == "phys":
3581 elif field == "name":
3583 elif field == "size":
3584 val = int(float(vol['size']))
3585 elif field == "instance":
3587 if node not in lv_by_node[inst]:
3589 if vol['name'] in lv_by_node[inst][node]:
3595 raise errors.ParameterError(field)
3596 node_output.append(str(val))
3598 output.append(node_output)
3603 class LUQueryNodeStorage(NoHooksLU):
3604 """Logical unit for getting information on storage units on node(s).
3607 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3609 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3610 ("storage_type", ht.NoDefault, _CheckStorageType),
3611 ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3612 ("name", None, ht.TMaybeString),
3616 def CheckArguments(self):
3617 _CheckOutputFields(static=self._FIELDS_STATIC,
3618 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3619 selected=self.op.output_fields)
3621 def ExpandNames(self):
3622 self.needed_locks = {}
3623 self.share_locks[locking.LEVEL_NODE] = 1
3626 self.needed_locks[locking.LEVEL_NODE] = \
3627 _GetWantedNodes(self, self.op.nodes)
3629 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3631 def Exec(self, feedback_fn):
3632 """Computes the list of nodes and their attributes.
3635 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3637 # Always get name to sort by
3638 if constants.SF_NAME in self.op.output_fields:
3639 fields = self.op.output_fields[:]
3641 fields = [constants.SF_NAME] + self.op.output_fields
3643 # Never ask for node or type as it's only known to the LU
3644 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3645 while extra in fields:
3646 fields.remove(extra)
3648 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3649 name_idx = field_idx[constants.SF_NAME]
3651 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3652 data = self.rpc.call_storage_list(self.nodes,
3653 self.op.storage_type, st_args,
3654 self.op.name, fields)
3658 for node in utils.NiceSort(self.nodes):
3659 nresult = data[node]
3663 msg = nresult.fail_msg
3665 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3668 rows = dict([(row[name_idx], row) for row in nresult.payload])
3670 for name in utils.NiceSort(rows.keys()):
3675 for field in self.op.output_fields:
3676 if field == constants.SF_NODE:
3678 elif field == constants.SF_TYPE:
3679 val = self.op.storage_type
3680 elif field in field_idx:
3681 val = row[field_idx[field]]
3683 raise errors.ParameterError(field)
3692 class LUModifyNodeStorage(NoHooksLU):
3693 """Logical unit for modifying a storage volume on a node.
3698 ("storage_type", ht.NoDefault, _CheckStorageType),
3699 ("name", ht.NoDefault, ht.TNonEmptyString),
3700 ("changes", ht.NoDefault, ht.TDict),
3704 def CheckArguments(self):
3705 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3707 storage_type = self.op.storage_type
3710 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3712 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3713 " modified" % storage_type,
3716 diff = set(self.op.changes.keys()) - modifiable
3718 raise errors.OpPrereqError("The following fields can not be modified for"
3719 " storage units of type '%s': %r" %
3720 (storage_type, list(diff)),
3723 def ExpandNames(self):
3724 self.needed_locks = {
3725 locking.LEVEL_NODE: self.op.node_name,
3728 def Exec(self, feedback_fn):
3729 """Computes the list of nodes and their attributes.
3732 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3733 result = self.rpc.call_storage_modify(self.op.node_name,
3734 self.op.storage_type, st_args,
3735 self.op.name, self.op.changes)
3736 result.Raise("Failed to modify storage unit '%s' on %s" %
3737 (self.op.name, self.op.node_name))
3740 class LUAddNode(LogicalUnit):
3741 """Logical unit for adding node to the cluster.
3745 HTYPE = constants.HTYPE_NODE
3748 ("primary_ip", None, ht.NoType),
3749 ("secondary_ip", None, ht.TMaybeString),
3750 ("readd", False, ht.TBool),
3751 ("group", None, ht.TMaybeString),
3752 ("master_capable", None, ht.TMaybeBool),
3753 ("vm_capable", None, ht.TMaybeBool),
3755 _NFLAGS = ["master_capable", "vm_capable"]
3757 def CheckArguments(self):
3758 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3759 # validate/normalize the node name
3760 self.hostname = netutils.GetHostname(name=self.op.node_name,
3761 family=self.primary_ip_family)
3762 self.op.node_name = self.hostname.name
3763 if self.op.readd and self.op.group:
3764 raise errors.OpPrereqError("Cannot pass a node group when a node is"
3765 " being readded", errors.ECODE_INVAL)
3767 def BuildHooksEnv(self):
3770 This will run on all nodes before, and on all nodes + the new node after.
3774 "OP_TARGET": self.op.node_name,
3775 "NODE_NAME": self.op.node_name,
3776 "NODE_PIP": self.op.primary_ip,
3777 "NODE_SIP": self.op.secondary_ip,
3778 "MASTER_CAPABLE": str(self.op.master_capable),
3779 "VM_CAPABLE": str(self.op.vm_capable),
3781 nodes_0 = self.cfg.GetNodeList()
3782 nodes_1 = nodes_0 + [self.op.node_name, ]
3783 return env, nodes_0, nodes_1
3785 def CheckPrereq(self):
3786 """Check prerequisites.
3789 - the new node is not already in the config
3791 - its parameters (single/dual homed) matches the cluster
3793 Any errors are signaled by raising errors.OpPrereqError.
3797 hostname = self.hostname
3798 node = hostname.name
3799 primary_ip = self.op.primary_ip = hostname.ip
3800 if self.op.secondary_ip is None:
3801 if self.primary_ip_family == netutils.IP6Address.family:
3802 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3803 " IPv4 address must be given as secondary",
3805 self.op.secondary_ip = primary_ip
3807 secondary_ip = self.op.secondary_ip
3808 if not netutils.IP4Address.IsValid(secondary_ip):
3809 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3810 " address" % secondary_ip, errors.ECODE_INVAL)
3812 node_list = cfg.GetNodeList()
3813 if not self.op.readd and node in node_list:
3814 raise errors.OpPrereqError("Node %s is already in the configuration" %
3815 node, errors.ECODE_EXISTS)
3816 elif self.op.readd and node not in node_list:
3817 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3820 self.changed_primary_ip = False
3822 for existing_node_name in node_list:
3823 existing_node = cfg.GetNodeInfo(existing_node_name)
3825 if self.op.readd and node == existing_node_name:
3826 if existing_node.secondary_ip != secondary_ip:
3827 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3828 " address configuration as before",
3830 if existing_node.primary_ip != primary_ip:
3831 self.changed_primary_ip = True
3835 if (existing_node.primary_ip == primary_ip or
3836 existing_node.secondary_ip == primary_ip or
3837 existing_node.primary_ip == secondary_ip or
3838 existing_node.secondary_ip == secondary_ip):
3839 raise errors.OpPrereqError("New node ip address(es) conflict with"
3840 " existing node %s" % existing_node.name,
3841 errors.ECODE_NOTUNIQUE)
3843 # After this 'if' block, None is no longer a valid value for the
3844 # _capable op attributes
3846 old_node = self.cfg.GetNodeInfo(node)
3847 assert old_node is not None, "Can't retrieve locked node %s" % node
3848 for attr in self._NFLAGS:
3849 if getattr(self.op, attr) is None:
3850 setattr(self.op, attr, getattr(old_node, attr))
3852 for attr in self._NFLAGS:
3853 if getattr(self.op, attr) is None:
3854 setattr(self.op, attr, True)
3856 if self.op.readd and not self.op.vm_capable:
3857 pri, sec = cfg.GetNodeInstances(node)
3859 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
3860 " flag set to false, but it already holds"
3861 " instances" % node,
3864 # check that the type of the node (single versus dual homed) is the
3865 # same as for the master
3866 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3867 master_singlehomed = myself.secondary_ip == myself.primary_ip
3868 newbie_singlehomed = secondary_ip == primary_ip
3869 if master_singlehomed != newbie_singlehomed:
3870 if master_singlehomed:
3871 raise errors.OpPrereqError("The master has no secondary ip but the"
3872 " new node has one",
3875 raise errors.OpPrereqError("The master has a secondary ip but the"
3876 " new node doesn't have one",
3879 # checks reachability
3880 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3881 raise errors.OpPrereqError("Node not reachable by ping",
3882 errors.ECODE_ENVIRON)
3884 if not newbie_singlehomed:
3885 # check reachability from my secondary ip to newbie's secondary ip
3886 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3887 source=myself.secondary_ip):
3888 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3889 " based ping to node daemon port",
3890 errors.ECODE_ENVIRON)
3897 if self.op.master_capable:
3898 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3900 self.master_candidate = False
3903 self.new_node = old_node
3905 node_group = cfg.LookupNodeGroup(self.op.group)
3906 self.new_node = objects.Node(name=node,
3907 primary_ip=primary_ip,
3908 secondary_ip=secondary_ip,
3909 master_candidate=self.master_candidate,
3910 offline=False, drained=False,
3913 def Exec(self, feedback_fn):
3914 """Adds the new node to the cluster.
3917 new_node = self.new_node
3918 node = new_node.name
3920 # for re-adds, reset the offline/drained/master-candidate flags;
3921 # we need to reset here, otherwise offline would prevent RPC calls
3922 # later in the procedure; this also means that if the re-add
3923 # fails, we are left with a non-offlined, broken node
3925 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3926 self.LogInfo("Readding a node, the offline/drained flags were reset")
3927 # if we demote the node, we do cleanup later in the procedure
3928 new_node.master_candidate = self.master_candidate
3929 if self.changed_primary_ip:
3930 new_node.primary_ip = self.op.primary_ip
3932 # copy the master/vm_capable flags
3933 for attr in self._NFLAGS:
3934 setattr(new_node, attr, getattr(self.op, attr))
3936 # notify the user about any possible mc promotion
3937 if new_node.master_candidate:
3938 self.LogInfo("Node will be a master candidate")
3940 # check connectivity
3941 result = self.rpc.call_version([node])[node]
3942 result.Raise("Can't get version information from node %s" % node)
3943 if constants.PROTOCOL_VERSION == result.payload:
3944 logging.info("Communication to node %s fine, sw version %s match",
3945 node, result.payload)
3947 raise errors.OpExecError("Version mismatch master version %s,"
3948 " node version %s" %
3949 (constants.PROTOCOL_VERSION, result.payload))
3951 # Add node to our /etc/hosts, and add key to known_hosts
3952 if self.cfg.GetClusterInfo().modify_etc_hosts:
3953 master_node = self.cfg.GetMasterNode()
3954 result = self.rpc.call_etc_hosts_modify(master_node,
3955 constants.ETC_HOSTS_ADD,
3958 result.Raise("Can't update hosts file with new host data")
3960 if new_node.secondary_ip != new_node.primary_ip:
3961 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
3964 node_verify_list = [self.cfg.GetMasterNode()]
3965 node_verify_param = {
3966 constants.NV_NODELIST: [node],
3967 # TODO: do a node-net-test as well?
3970 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3971 self.cfg.GetClusterName())
3972 for verifier in node_verify_list:
3973 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3974 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3976 for failed in nl_payload:
3977 feedback_fn("ssh/hostname verification failed"
3978 " (checking from %s): %s" %
3979 (verifier, nl_payload[failed]))
3980 raise errors.OpExecError("ssh/hostname verification failed.")
3983 _RedistributeAncillaryFiles(self)
3984 self.context.ReaddNode(new_node)
3985 # make sure we redistribute the config
3986 self.cfg.Update(new_node, feedback_fn)
3987 # and make sure the new node will not have old files around
3988 if not new_node.master_candidate:
3989 result = self.rpc.call_node_demote_from_mc(new_node.name)
3990 msg = result.fail_msg
3992 self.LogWarning("Node failed to demote itself from master"
3993 " candidate status: %s" % msg)
3995 _RedistributeAncillaryFiles(self, additional_nodes=[node],
3996 additional_vm=self.op.vm_capable)
3997 self.context.AddNode(new_node, self.proc.GetECId())
4000 class LUSetNodeParams(LogicalUnit):
4001 """Modifies the parameters of a node.
4003 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4004 to the node role (as _ROLE_*)
4005 @cvar _R2F: a dictionary from node role to tuples of flags
4006 @cvar _FLAGS: a list of attribute names corresponding to the flags
4009 HPATH = "node-modify"
4010 HTYPE = constants.HTYPE_NODE
4013 ("master_candidate", None, ht.TMaybeBool),
4014 ("offline", None, ht.TMaybeBool),
4015 ("drained", None, ht.TMaybeBool),
4016 ("auto_promote", False, ht.TBool),
4017 ("master_capable", None, ht.TMaybeBool),
4018 ("vm_capable", None, ht.TMaybeBool),
4019 ("secondary_ip", None, ht.TMaybeString),
4023 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4025 (True, False, False): _ROLE_CANDIDATE,
4026 (False, True, False): _ROLE_DRAINED,
4027 (False, False, True): _ROLE_OFFLINE,
4028 (False, False, False): _ROLE_REGULAR,
4030 _R2F = dict((v, k) for k, v in _F2R.items())
4031 _FLAGS = ["master_candidate", "drained", "offline"]
4033 def CheckArguments(self):
4034 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4035 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4036 self.op.master_capable, self.op.vm_capable,
4037 self.op.secondary_ip]
4038 if all_mods.count(None) == len(all_mods):
4039 raise errors.OpPrereqError("Please pass at least one modification",
4041 if all_mods.count(True) > 1:
4042 raise errors.OpPrereqError("Can't set the node into more than one"
4043 " state at the same time",
4046 # Boolean value that tells us whether we might be demoting from MC
4047 self.might_demote = (self.op.master_candidate == False or
4048 self.op.offline == True or
4049 self.op.drained == True or
4050 self.op.master_capable == False)
4052 if self.op.secondary_ip:
4053 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4054 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4055 " address" % self.op.secondary_ip,
4058 self.lock_all = self.op.auto_promote and self.might_demote
4059 self.lock_instances = self.op.secondary_ip is not None
4061 def ExpandNames(self):
4063 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4065 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4067 if self.lock_instances:
4068 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4070 def DeclareLocks(self, level):
4071 # If we have locked all instances, before waiting to lock nodes, release
4072 # all the ones living on nodes unrelated to the current operation.
4073 if level == locking.LEVEL_NODE and self.lock_instances:
4074 instances_release = []
4076 self.affected_instances = []
4077 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4078 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4079 instance = self.context.cfg.GetInstanceInfo(instance_name)
4080 i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4081 if i_mirrored and self.op.node_name in instance.all_nodes:
4082 instances_keep.append(instance_name)
4083 self.affected_instances.append(instance)
4085 instances_release.append(instance_name)
4086 if instances_release:
4087 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4088 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4090 def BuildHooksEnv(self):
4093 This runs on the master node.
4097 "OP_TARGET": self.op.node_name,
4098 "MASTER_CANDIDATE": str(self.op.master_candidate),
4099 "OFFLINE": str(self.op.offline),
4100 "DRAINED": str(self.op.drained),
4101 "MASTER_CAPABLE": str(self.op.master_capable),
4102 "VM_CAPABLE": str(self.op.vm_capable),
4104 nl = [self.cfg.GetMasterNode(),
4108 def CheckPrereq(self):
4109 """Check prerequisites.
4111 This only checks the instance list against the existing names.
4114 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4116 if (self.op.master_candidate is not None or
4117 self.op.drained is not None or
4118 self.op.offline is not None):
4119 # we can't change the master's node flags
4120 if self.op.node_name == self.cfg.GetMasterNode():
4121 raise errors.OpPrereqError("The master role can be changed"
4122 " only via master-failover",
4125 if self.op.master_candidate and not node.master_capable:
4126 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4127 " it a master candidate" % node.name,
4130 if self.op.vm_capable == False:
4131 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4133 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4134 " the vm_capable flag" % node.name,
4137 if node.master_candidate and self.might_demote and not self.lock_all:
4138 assert not self.op.auto_promote, "auto-promote set but lock_all not"
4139 # check if after removing the current node, we're missing master
4141 (mc_remaining, mc_should, _) = \
4142 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4143 if mc_remaining < mc_should:
4144 raise errors.OpPrereqError("Not enough master candidates, please"
4145 " pass auto_promote to allow promotion",
4148 self.old_flags = old_flags = (node.master_candidate,
4149 node.drained, node.offline)
4150 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4151 self.old_role = old_role = self._F2R[old_flags]
4153 # Check for ineffective changes
4154 for attr in self._FLAGS:
4155 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4156 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4157 setattr(self.op, attr, None)
4159 # Past this point, any flag change to False means a transition
4160 # away from the respective state, as only real changes are kept
4162 # If we're being deofflined/drained, we'll MC ourself if needed
4163 if (self.op.drained == False or self.op.offline == False or
4164 (self.op.master_capable and not node.master_capable)):
4165 if _DecideSelfPromotion(self):
4166 self.op.master_candidate = True
4167 self.LogInfo("Auto-promoting node to master candidate")
4169 # If we're no longer master capable, we'll demote ourselves from MC
4170 if self.op.master_capable == False and node.master_candidate:
4171 self.LogInfo("Demoting from master candidate")
4172 self.op.master_candidate = False
4175 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4176 if self.op.master_candidate:
4177 new_role = self._ROLE_CANDIDATE
4178 elif self.op.drained:
4179 new_role = self._ROLE_DRAINED
4180 elif self.op.offline:
4181 new_role = self._ROLE_OFFLINE
4182 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4183 # False is still in new flags, which means we're un-setting (the
4185 new_role = self._ROLE_REGULAR
4186 else: # no new flags, nothing, keep old role
4189 self.new_role = new_role
4191 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4192 # Trying to transition out of offline status
4193 result = self.rpc.call_version([node.name])[node.name]
4195 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4196 " to report its version: %s" %
4197 (node.name, result.fail_msg),
4200 self.LogWarning("Transitioning node from offline to online state"
4201 " without using re-add. Please make sure the node"
4204 if self.op.secondary_ip:
4205 # Ok even without locking, because this can't be changed by any LU
4206 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4207 master_singlehomed = master.secondary_ip == master.primary_ip
4208 if master_singlehomed and self.op.secondary_ip:
4209 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4210 " homed cluster", errors.ECODE_INVAL)
4213 if self.affected_instances:
4214 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4215 " node has instances (%s) configured"
4216 " to use it" % self.affected_instances)
4218 # On online nodes, check that no instances are running, and that
4219 # the node has the new ip and we can reach it.
4220 for instance in self.affected_instances:
4221 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4223 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4224 if master.name != node.name:
4225 # check reachability from master secondary ip to new secondary ip
4226 if not netutils.TcpPing(self.op.secondary_ip,
4227 constants.DEFAULT_NODED_PORT,
4228 source=master.secondary_ip):
4229 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4230 " based ping to node daemon port",
4231 errors.ECODE_ENVIRON)
4233 def Exec(self, feedback_fn):
4238 old_role = self.old_role
4239 new_role = self.new_role
4243 for attr in ["master_capable", "vm_capable"]:
4244 val = getattr(self.op, attr)
4246 setattr(node, attr, val)
4247 result.append((attr, str(val)))
4249 if new_role != old_role:
4250 # Tell the node to demote itself, if no longer MC and not offline
4251 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4252 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4254 self.LogWarning("Node failed to demote itself: %s", msg)
4256 new_flags = self._R2F[new_role]
4257 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4259 result.append((desc, str(nf)))
4260 (node.master_candidate, node.drained, node.offline) = new_flags
4262 # we locked all nodes, we adjust the CP before updating this node
4264 _AdjustCandidatePool(self, [node.name])
4266 if self.op.secondary_ip:
4267 node.secondary_ip = self.op.secondary_ip
4268 result.append(("secondary_ip", self.op.secondary_ip))
4270 # this will trigger configuration file update, if needed
4271 self.cfg.Update(node, feedback_fn)
4273 # this will trigger job queue propagation or cleanup if the mc
4275 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4276 self.context.ReaddNode(node)
4281 class LUPowercycleNode(NoHooksLU):
4282 """Powercycles a node.
4291 def CheckArguments(self):
4292 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4293 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4294 raise errors.OpPrereqError("The node is the master and the force"
4295 " parameter was not set",
4298 def ExpandNames(self):
4299 """Locking for PowercycleNode.
4301 This is a last-resort option and shouldn't block on other
4302 jobs. Therefore, we grab no locks.
4305 self.needed_locks = {}
4307 def Exec(self, feedback_fn):
4311 result = self.rpc.call_node_powercycle(self.op.node_name,
4312 self.cfg.GetHypervisorType())
4313 result.Raise("Failed to schedule the reboot")
4314 return result.payload
4317 class LUQueryClusterInfo(NoHooksLU):
4318 """Query cluster configuration.
4323 def ExpandNames(self):
4324 self.needed_locks = {}
4326 def Exec(self, feedback_fn):
4327 """Return cluster config.
4330 cluster = self.cfg.GetClusterInfo()
4333 # Filter just for enabled hypervisors
4334 for os_name, hv_dict in cluster.os_hvp.items():
4335 os_hvp[os_name] = {}
4336 for hv_name, hv_params in hv_dict.items():
4337 if hv_name in cluster.enabled_hypervisors:
4338 os_hvp[os_name][hv_name] = hv_params
4340 # Convert ip_family to ip_version
4341 primary_ip_version = constants.IP4_VERSION
4342 if cluster.primary_ip_family == netutils.IP6Address.family:
4343 primary_ip_version = constants.IP6_VERSION
4346 "software_version": constants.RELEASE_VERSION,
4347 "protocol_version": constants.PROTOCOL_VERSION,
4348 "config_version": constants.CONFIG_VERSION,
4349 "os_api_version": max(constants.OS_API_VERSIONS),
4350 "export_version": constants.EXPORT_VERSION,
4351 "architecture": (platform.architecture()[0], platform.machine()),
4352 "name": cluster.cluster_name,
4353 "master": cluster.master_node,
4354 "default_hypervisor": cluster.enabled_hypervisors[0],
4355 "enabled_hypervisors": cluster.enabled_hypervisors,
4356 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4357 for hypervisor_name in cluster.enabled_hypervisors]),
4359 "beparams": cluster.beparams,
4360 "osparams": cluster.osparams,
4361 "nicparams": cluster.nicparams,
4362 "candidate_pool_size": cluster.candidate_pool_size,
4363 "master_netdev": cluster.master_netdev,
4364 "volume_group_name": cluster.volume_group_name,
4365 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4366 "file_storage_dir": cluster.file_storage_dir,
4367 "maintain_node_health": cluster.maintain_node_health,
4368 "ctime": cluster.ctime,
4369 "mtime": cluster.mtime,
4370 "uuid": cluster.uuid,
4371 "tags": list(cluster.GetTags()),
4372 "uid_pool": cluster.uid_pool,
4373 "default_iallocator": cluster.default_iallocator,
4374 "reserved_lvs": cluster.reserved_lvs,
4375 "primary_ip_version": primary_ip_version,
4376 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4382 class LUQueryConfigValues(NoHooksLU):
4383 """Return configuration values.
4386 _OP_PARAMS = [_POutputFields]
4388 _FIELDS_DYNAMIC = utils.FieldSet()
4389 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4390 "watcher_pause", "volume_group_name")
4392 def CheckArguments(self):
4393 _CheckOutputFields(static=self._FIELDS_STATIC,
4394 dynamic=self._FIELDS_DYNAMIC,
4395 selected=self.op.output_fields)
4397 def ExpandNames(self):
4398 self.needed_locks = {}
4400 def Exec(self, feedback_fn):
4401 """Dump a representation of the cluster config to the standard output.
4405 for field in self.op.output_fields:
4406 if field == "cluster_name":
4407 entry = self.cfg.GetClusterName()
4408 elif field == "master_node":
4409 entry = self.cfg.GetMasterNode()
4410 elif field == "drain_flag":
4411 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4412 elif field == "watcher_pause":
4413 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4414 elif field == "volume_group_name":
4415 entry = self.cfg.GetVGName()
4417 raise errors.ParameterError(field)
4418 values.append(entry)
4422 class LUActivateInstanceDisks(NoHooksLU):
4423 """Bring up an instance's disks.
4428 ("ignore_size", False, ht.TBool),
4432 def ExpandNames(self):
4433 self._ExpandAndLockInstance()
4434 self.needed_locks[locking.LEVEL_NODE] = []
4435 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4437 def DeclareLocks(self, level):
4438 if level == locking.LEVEL_NODE:
4439 self._LockInstancesNodes()
4441 def CheckPrereq(self):
4442 """Check prerequisites.
4444 This checks that the instance is in the cluster.
4447 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4448 assert self.instance is not None, \
4449 "Cannot retrieve locked instance %s" % self.op.instance_name
4450 _CheckNodeOnline(self, self.instance.primary_node)
4452 def Exec(self, feedback_fn):
4453 """Activate the disks.
4456 disks_ok, disks_info = \
4457 _AssembleInstanceDisks(self, self.instance,
4458 ignore_size=self.op.ignore_size)
4460 raise errors.OpExecError("Cannot activate block devices")
4465 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4467 """Prepare the block devices for an instance.
4469 This sets up the block devices on all nodes.
4471 @type lu: L{LogicalUnit}
4472 @param lu: the logical unit on whose behalf we execute
4473 @type instance: L{objects.Instance}
4474 @param instance: the instance for whose disks we assemble
4475 @type disks: list of L{objects.Disk} or None
4476 @param disks: which disks to assemble (or all, if None)
4477 @type ignore_secondaries: boolean
4478 @param ignore_secondaries: if true, errors on secondary nodes
4479 won't result in an error return from the function
4480 @type ignore_size: boolean
4481 @param ignore_size: if true, the current known size of the disk
4482 will not be used during the disk activation, useful for cases
4483 when the size is wrong
4484 @return: False if the operation failed, otherwise a list of
4485 (host, instance_visible_name, node_visible_name)
4486 with the mapping from node devices to instance devices
4491 iname = instance.name
4492 disks = _ExpandCheckDisks(instance, disks)
4494 # With the two passes mechanism we try to reduce the window of
4495 # opportunity for the race condition of switching DRBD to primary
4496 # before handshaking occured, but we do not eliminate it
4498 # The proper fix would be to wait (with some limits) until the
4499 # connection has been made and drbd transitions from WFConnection
4500 # into any other network-connected state (Connected, SyncTarget,
4503 # 1st pass, assemble on all nodes in secondary mode
4504 for inst_disk in disks:
4505 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4507 node_disk = node_disk.Copy()
4508 node_disk.UnsetSize()
4509 lu.cfg.SetDiskID(node_disk, node)
4510 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4511 msg = result.fail_msg
4513 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4514 " (is_primary=False, pass=1): %s",
4515 inst_disk.iv_name, node, msg)
4516 if not ignore_secondaries:
4519 # FIXME: race condition on drbd migration to primary
4521 # 2nd pass, do only the primary node
4522 for inst_disk in disks:
4525 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4526 if node != instance.primary_node:
4529 node_disk = node_disk.Copy()
4530 node_disk.UnsetSize()
4531 lu.cfg.SetDiskID(node_disk, node)
4532 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4533 msg = result.fail_msg
4535 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4536 " (is_primary=True, pass=2): %s",
4537 inst_disk.iv_name, node, msg)
4540 dev_path = result.payload
4542 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4544 # leave the disks configured for the primary node
4545 # this is a workaround that would be fixed better by
4546 # improving the logical/physical id handling
4548 lu.cfg.SetDiskID(disk, instance.primary_node)
4550 return disks_ok, device_info
4553 def _StartInstanceDisks(lu, instance, force):
4554 """Start the disks of an instance.
4557 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4558 ignore_secondaries=force)
4560 _ShutdownInstanceDisks(lu, instance)
4561 if force is not None and not force:
4562 lu.proc.LogWarning("", hint="If the message above refers to a"
4564 " you can retry the operation using '--force'.")
4565 raise errors.OpExecError("Disk consistency error")
4568 class LUDeactivateInstanceDisks(NoHooksLU):
4569 """Shutdown an instance's disks.
4577 def ExpandNames(self):
4578 self._ExpandAndLockInstance()
4579 self.needed_locks[locking.LEVEL_NODE] = []
4580 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4582 def DeclareLocks(self, level):
4583 if level == locking.LEVEL_NODE:
4584 self._LockInstancesNodes()
4586 def CheckPrereq(self):
4587 """Check prerequisites.
4589 This checks that the instance is in the cluster.
4592 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4593 assert self.instance is not None, \
4594 "Cannot retrieve locked instance %s" % self.op.instance_name
4596 def Exec(self, feedback_fn):
4597 """Deactivate the disks
4600 instance = self.instance
4601 _SafeShutdownInstanceDisks(self, instance)
4604 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4605 """Shutdown block devices of an instance.
4607 This function checks if an instance is running, before calling
4608 _ShutdownInstanceDisks.
4611 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4612 _ShutdownInstanceDisks(lu, instance, disks=disks)
4615 def _ExpandCheckDisks(instance, disks):
4616 """Return the instance disks selected by the disks list
4618 @type disks: list of L{objects.Disk} or None
4619 @param disks: selected disks
4620 @rtype: list of L{objects.Disk}
4621 @return: selected instance disks to act on
4625 return instance.disks
4627 if not set(disks).issubset(instance.disks):
4628 raise errors.ProgrammerError("Can only act on disks belonging to the"
4633 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4634 """Shutdown block devices of an instance.
4636 This does the shutdown on all nodes of the instance.
4638 If the ignore_primary is false, errors on the primary node are
4643 disks = _ExpandCheckDisks(instance, disks)
4646 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4647 lu.cfg.SetDiskID(top_disk, node)
4648 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4649 msg = result.fail_msg
4651 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4652 disk.iv_name, node, msg)
4653 if not ignore_primary or node != instance.primary_node:
4658 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4659 """Checks if a node has enough free memory.
4661 This function check if a given node has the needed amount of free
4662 memory. In case the node has less memory or we cannot get the
4663 information from the node, this function raise an OpPrereqError
4666 @type lu: C{LogicalUnit}
4667 @param lu: a logical unit from which we get configuration data
4669 @param node: the node to check
4670 @type reason: C{str}
4671 @param reason: string to use in the error message
4672 @type requested: C{int}
4673 @param requested: the amount of memory in MiB to check for
4674 @type hypervisor_name: C{str}
4675 @param hypervisor_name: the hypervisor to ask for memory stats
4676 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4677 we cannot check the node
4680 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4681 nodeinfo[node].Raise("Can't get data from node %s" % node,
4682 prereq=True, ecode=errors.ECODE_ENVIRON)
4683 free_mem = nodeinfo[node].payload.get('memory_free', None)
4684 if not isinstance(free_mem, int):
4685 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4686 " was '%s'" % (node, free_mem),
4687 errors.ECODE_ENVIRON)
4688 if requested > free_mem:
4689 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4690 " needed %s MiB, available %s MiB" %
4691 (node, reason, requested, free_mem),
4695 def _CheckNodesFreeDisk(lu, nodenames, requested):
4696 """Checks if nodes have enough free disk space in the default VG.
4698 This function check if all given nodes have the needed amount of
4699 free disk. In case any node has less disk or we cannot get the
4700 information from the node, this function raise an OpPrereqError
4703 @type lu: C{LogicalUnit}
4704 @param lu: a logical unit from which we get configuration data
4705 @type nodenames: C{list}
4706 @param nodenames: the list of node names to check
4707 @type requested: C{int}
4708 @param requested: the amount of disk in MiB to check for
4709 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4710 we cannot check the node
4713 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4714 lu.cfg.GetHypervisorType())
4715 for node in nodenames:
4716 info = nodeinfo[node]
4717 info.Raise("Cannot get current information from node %s" % node,
4718 prereq=True, ecode=errors.ECODE_ENVIRON)
4719 vg_free = info.payload.get("vg_free", None)
4720 if not isinstance(vg_free, int):
4721 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4722 " result was '%s'" % (node, vg_free),
4723 errors.ECODE_ENVIRON)
4724 if requested > vg_free:
4725 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4726 " required %d MiB, available %d MiB" %
4727 (node, requested, vg_free),
4731 class LUStartupInstance(LogicalUnit):
4732 """Starts an instance.
4735 HPATH = "instance-start"
4736 HTYPE = constants.HTYPE_INSTANCE
4740 _PIgnoreOfflineNodes,
4741 ("hvparams", ht.EmptyDict, ht.TDict),
4742 ("beparams", ht.EmptyDict, ht.TDict),
4746 def CheckArguments(self):
4748 if self.op.beparams:
4749 # fill the beparams dict
4750 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4752 def ExpandNames(self):
4753 self._ExpandAndLockInstance()
4755 def BuildHooksEnv(self):
4758 This runs on master, primary and secondary nodes of the instance.
4762 "FORCE": self.op.force,
4764 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4765 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4768 def CheckPrereq(self):
4769 """Check prerequisites.
4771 This checks that the instance is in the cluster.
4774 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4775 assert self.instance is not None, \
4776 "Cannot retrieve locked instance %s" % self.op.instance_name
4779 if self.op.hvparams:
4780 # check hypervisor parameter syntax (locally)
4781 cluster = self.cfg.GetClusterInfo()
4782 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4783 filled_hvp = cluster.FillHV(instance)
4784 filled_hvp.update(self.op.hvparams)
4785 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4786 hv_type.CheckParameterSyntax(filled_hvp)
4787 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4789 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4791 if self.primary_offline and self.op.ignore_offline_nodes:
4792 self.proc.LogWarning("Ignoring offline primary node")
4794 if self.op.hvparams or self.op.beparams:
4795 self.proc.LogWarning("Overridden parameters are ignored")
4797 _CheckNodeOnline(self, instance.primary_node)
4799 bep = self.cfg.GetClusterInfo().FillBE(instance)
4801 # check bridges existence
4802 _CheckInstanceBridgesExist(self, instance)
4804 remote_info = self.rpc.call_instance_info(instance.primary_node,
4806 instance.hypervisor)
4807 remote_info.Raise("Error checking node %s" % instance.primary_node,
4808 prereq=True, ecode=errors.ECODE_ENVIRON)
4809 if not remote_info.payload: # not running already
4810 _CheckNodeFreeMemory(self, instance.primary_node,
4811 "starting instance %s" % instance.name,
4812 bep[constants.BE_MEMORY], instance.hypervisor)
4814 def Exec(self, feedback_fn):
4815 """Start the instance.
4818 instance = self.instance
4819 force = self.op.force
4821 self.cfg.MarkInstanceUp(instance.name)
4823 if self.primary_offline:
4824 assert self.op.ignore_offline_nodes
4825 self.proc.LogInfo("Primary node offline, marked instance as started")
4827 node_current = instance.primary_node
4829 _StartInstanceDisks(self, instance, force)
4831 result = self.rpc.call_instance_start(node_current, instance,
4832 self.op.hvparams, self.op.beparams)
4833 msg = result.fail_msg
4835 _ShutdownInstanceDisks(self, instance)
4836 raise errors.OpExecError("Could not start instance: %s" % msg)
4839 class LURebootInstance(LogicalUnit):
4840 """Reboot an instance.
4843 HPATH = "instance-reboot"
4844 HTYPE = constants.HTYPE_INSTANCE
4847 ("ignore_secondaries", False, ht.TBool),
4848 ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
4853 def ExpandNames(self):
4854 self._ExpandAndLockInstance()
4856 def BuildHooksEnv(self):
4859 This runs on master, primary and secondary nodes of the instance.
4863 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4864 "REBOOT_TYPE": self.op.reboot_type,
4865 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4867 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4868 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4871 def CheckPrereq(self):
4872 """Check prerequisites.
4874 This checks that the instance is in the cluster.
4877 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4878 assert self.instance is not None, \
4879 "Cannot retrieve locked instance %s" % self.op.instance_name
4881 _CheckNodeOnline(self, instance.primary_node)
4883 # check bridges existence
4884 _CheckInstanceBridgesExist(self, instance)
4886 def Exec(self, feedback_fn):
4887 """Reboot the instance.
4890 instance = self.instance
4891 ignore_secondaries = self.op.ignore_secondaries
4892 reboot_type = self.op.reboot_type
4894 node_current = instance.primary_node
4896 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4897 constants.INSTANCE_REBOOT_HARD]:
4898 for disk in instance.disks:
4899 self.cfg.SetDiskID(disk, node_current)
4900 result = self.rpc.call_instance_reboot(node_current, instance,
4902 self.op.shutdown_timeout)
4903 result.Raise("Could not reboot instance")
4905 result = self.rpc.call_instance_shutdown(node_current, instance,
4906 self.op.shutdown_timeout)
4907 result.Raise("Could not shutdown instance for full reboot")
4908 _ShutdownInstanceDisks(self, instance)
4909 _StartInstanceDisks(self, instance, ignore_secondaries)
4910 result = self.rpc.call_instance_start(node_current, instance, None, None)
4911 msg = result.fail_msg
4913 _ShutdownInstanceDisks(self, instance)
4914 raise errors.OpExecError("Could not start instance for"
4915 " full reboot: %s" % msg)
4917 self.cfg.MarkInstanceUp(instance.name)
4920 class LUShutdownInstance(LogicalUnit):
4921 """Shutdown an instance.
4924 HPATH = "instance-stop"
4925 HTYPE = constants.HTYPE_INSTANCE
4928 _PIgnoreOfflineNodes,
4929 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
4933 def ExpandNames(self):
4934 self._ExpandAndLockInstance()
4936 def BuildHooksEnv(self):
4939 This runs on master, primary and secondary nodes of the instance.
4942 env = _BuildInstanceHookEnvByObject(self, self.instance)
4943 env["TIMEOUT"] = self.op.timeout
4944 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4947 def CheckPrereq(self):
4948 """Check prerequisites.
4950 This checks that the instance is in the cluster.
4953 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4954 assert self.instance is not None, \
4955 "Cannot retrieve locked instance %s" % self.op.instance_name
4957 self.primary_offline = \
4958 self.cfg.GetNodeInfo(self.instance.primary_node).offline
4960 if self.primary_offline and self.op.ignore_offline_nodes:
4961 self.proc.LogWarning("Ignoring offline primary node")
4963 _CheckNodeOnline(self, self.instance.primary_node)
4965 def Exec(self, feedback_fn):
4966 """Shutdown the instance.
4969 instance = self.instance
4970 node_current = instance.primary_node
4971 timeout = self.op.timeout
4973 self.cfg.MarkInstanceDown(instance.name)
4975 if self.primary_offline:
4976 assert self.op.ignore_offline_nodes
4977 self.proc.LogInfo("Primary node offline, marked instance as stopped")
4979 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4980 msg = result.fail_msg
4982 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4984 _ShutdownInstanceDisks(self, instance)
4987 class LUReinstallInstance(LogicalUnit):
4988 """Reinstall an instance.
4991 HPATH = "instance-reinstall"
4992 HTYPE = constants.HTYPE_INSTANCE
4995 ("os_type", None, ht.TMaybeString),
4996 ("force_variant", False, ht.TBool),
4997 ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
5001 def ExpandNames(self):
5002 self._ExpandAndLockInstance()
5004 def BuildHooksEnv(self):
5007 This runs on master, primary and secondary nodes of the instance.
5010 env = _BuildInstanceHookEnvByObject(self, self.instance)
5011 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5014 def CheckPrereq(self):
5015 """Check prerequisites.
5017 This checks that the instance is in the cluster and is not running.
5020 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5021 assert instance is not None, \
5022 "Cannot retrieve locked instance %s" % self.op.instance_name
5023 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5024 " offline, cannot reinstall")
5025 for node in instance.secondary_nodes:
5026 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5027 " cannot reinstall")
5029 if instance.disk_template == constants.DT_DISKLESS:
5030 raise errors.OpPrereqError("Instance '%s' has no disks" %
5031 self.op.instance_name,
5033 _CheckInstanceDown(self, instance, "cannot reinstall")
5035 if self.op.os_type is not None:
5037 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5038 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5039 instance_os = self.op.os_type
5041 instance_os = instance.os
5043 nodelist = list(instance.all_nodes)
5045 if self.op.osparams:
5046 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5047 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5048 self.os_inst = i_osdict # the new dict (without defaults)
5052 self.instance = instance
5054 def Exec(self, feedback_fn):
5055 """Reinstall the instance.
5058 inst = self.instance
5060 if self.op.os_type is not None:
5061 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5062 inst.os = self.op.os_type
5063 # Write to configuration
5064 self.cfg.Update(inst, feedback_fn)
5066 _StartInstanceDisks(self, inst, None)
5068 feedback_fn("Running the instance OS create scripts...")
5069 # FIXME: pass debug option from opcode to backend
5070 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5071 self.op.debug_level,
5072 osparams=self.os_inst)
5073 result.Raise("Could not install OS for instance %s on node %s" %
5074 (inst.name, inst.primary_node))
5076 _ShutdownInstanceDisks(self, inst)
5079 class LURecreateInstanceDisks(LogicalUnit):
5080 """Recreate an instance's missing disks.
5083 HPATH = "instance-recreate-disks"
5084 HTYPE = constants.HTYPE_INSTANCE
5087 ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5091 def ExpandNames(self):
5092 self._ExpandAndLockInstance()
5094 def BuildHooksEnv(self):
5097 This runs on master, primary and secondary nodes of the instance.
5100 env = _BuildInstanceHookEnvByObject(self, self.instance)
5101 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5104 def CheckPrereq(self):
5105 """Check prerequisites.
5107 This checks that the instance is in the cluster and is not running.
5110 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5111 assert instance is not None, \
5112 "Cannot retrieve locked instance %s" % self.op.instance_name
5113 _CheckNodeOnline(self, instance.primary_node)
5115 if instance.disk_template == constants.DT_DISKLESS:
5116 raise errors.OpPrereqError("Instance '%s' has no disks" %
5117 self.op.instance_name, errors.ECODE_INVAL)
5118 _CheckInstanceDown(self, instance, "cannot recreate disks")
5120 if not self.op.disks:
5121 self.op.disks = range(len(instance.disks))
5123 for idx in self.op.disks:
5124 if idx >= len(instance.disks):
5125 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5128 self.instance = instance
5130 def Exec(self, feedback_fn):
5131 """Recreate the disks.
5135 for idx, _ in enumerate(self.instance.disks):
5136 if idx not in self.op.disks: # disk idx has not been passed in
5140 _CreateDisks(self, self.instance, to_skip=to_skip)
5143 class LURenameInstance(LogicalUnit):
5144 """Rename an instance.
5147 HPATH = "instance-rename"
5148 HTYPE = constants.HTYPE_INSTANCE
5151 ("new_name", ht.NoDefault, ht.TNonEmptyString),
5152 ("ip_check", False, ht.TBool),
5153 ("name_check", True, ht.TBool),
5156 def CheckArguments(self):
5160 if self.op.ip_check and not self.op.name_check:
5161 # TODO: make the ip check more flexible and not depend on the name check
5162 raise errors.OpPrereqError("Cannot do ip check without a name check",
5165 def BuildHooksEnv(self):
5168 This runs on master, primary and secondary nodes of the instance.
5171 env = _BuildInstanceHookEnvByObject(self, self.instance)
5172 env["INSTANCE_NEW_NAME"] = self.op.new_name
5173 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5176 def CheckPrereq(self):
5177 """Check prerequisites.
5179 This checks that the instance is in the cluster and is not running.
5182 self.op.instance_name = _ExpandInstanceName(self.cfg,
5183 self.op.instance_name)
5184 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5185 assert instance is not None
5186 _CheckNodeOnline(self, instance.primary_node)
5187 _CheckInstanceDown(self, instance, "cannot rename")
5188 self.instance = instance
5190 new_name = self.op.new_name
5191 if self.op.name_check:
5192 hostname = netutils.GetHostname(name=new_name)
5193 new_name = self.op.new_name = hostname.name
5194 if (self.op.ip_check and
5195 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5196 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5197 (hostname.ip, new_name),
5198 errors.ECODE_NOTUNIQUE)
5200 instance_list = self.cfg.GetInstanceList()
5201 if new_name in instance_list:
5202 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5203 new_name, errors.ECODE_EXISTS)
5205 def Exec(self, feedback_fn):
5206 """Reinstall the instance.
5209 inst = self.instance
5210 old_name = inst.name
5212 if inst.disk_template == constants.DT_FILE:
5213 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5215 self.cfg.RenameInstance(inst.name, self.op.new_name)
5216 # Change the instance lock. This is definitely safe while we hold the BGL
5217 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5218 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5220 # re-read the instance from the configuration after rename
5221 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5223 if inst.disk_template == constants.DT_FILE:
5224 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5225 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5226 old_file_storage_dir,
5227 new_file_storage_dir)
5228 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5229 " (but the instance has been renamed in Ganeti)" %
5230 (inst.primary_node, old_file_storage_dir,
5231 new_file_storage_dir))
5233 _StartInstanceDisks(self, inst, None)
5235 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5236 old_name, self.op.debug_level)
5237 msg = result.fail_msg
5239 msg = ("Could not run OS rename script for instance %s on node %s"
5240 " (but the instance has been renamed in Ganeti): %s" %
5241 (inst.name, inst.primary_node, msg))
5242 self.proc.LogWarning(msg)
5244 _ShutdownInstanceDisks(self, inst)
5249 class LURemoveInstance(LogicalUnit):
5250 """Remove an instance.
5253 HPATH = "instance-remove"
5254 HTYPE = constants.HTYPE_INSTANCE
5257 ("ignore_failures", False, ht.TBool),
5262 def ExpandNames(self):
5263 self._ExpandAndLockInstance()
5264 self.needed_locks[locking.LEVEL_NODE] = []
5265 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5267 def DeclareLocks(self, level):
5268 if level == locking.LEVEL_NODE:
5269 self._LockInstancesNodes()
5271 def BuildHooksEnv(self):
5274 This runs on master, primary and secondary nodes of the instance.
5277 env = _BuildInstanceHookEnvByObject(self, self.instance)
5278 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5279 nl = [self.cfg.GetMasterNode()]
5280 nl_post = list(self.instance.all_nodes) + nl
5281 return env, nl, nl_post
5283 def CheckPrereq(self):
5284 """Check prerequisites.
5286 This checks that the instance is in the cluster.
5289 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5290 assert self.instance is not None, \
5291 "Cannot retrieve locked instance %s" % self.op.instance_name
5293 def Exec(self, feedback_fn):
5294 """Remove the instance.
5297 instance = self.instance
5298 logging.info("Shutting down instance %s on node %s",
5299 instance.name, instance.primary_node)
5301 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5302 self.op.shutdown_timeout)
5303 msg = result.fail_msg
5305 if self.op.ignore_failures:
5306 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5308 raise errors.OpExecError("Could not shutdown instance %s on"
5310 (instance.name, instance.primary_node, msg))
5312 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5315 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5316 """Utility function to remove an instance.
5319 logging.info("Removing block devices for instance %s", instance.name)
5321 if not _RemoveDisks(lu, instance):
5322 if not ignore_failures:
5323 raise errors.OpExecError("Can't remove instance's disks")
5324 feedback_fn("Warning: can't remove instance's disks")
5326 logging.info("Removing instance %s out of cluster config", instance.name)
5328 lu.cfg.RemoveInstance(instance.name)
5330 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5331 "Instance lock removal conflict"
5333 # Remove lock for the instance
5334 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5337 class LUQueryInstances(NoHooksLU):
5338 """Logical unit for querying instances.
5341 # pylint: disable-msg=W0142
5343 ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
5344 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5345 ("use_locking", False, ht.TBool),
5348 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5349 "serial_no", "ctime", "mtime", "uuid"]
5350 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5352 "disk_template", "ip", "mac", "bridge",
5353 "nic_mode", "nic_link",
5354 "sda_size", "sdb_size", "vcpus", "tags",
5355 "network_port", "beparams",
5356 r"(disk)\.(size)/([0-9]+)",
5357 r"(disk)\.(sizes)", "disk_usage",
5358 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5359 r"(nic)\.(bridge)/([0-9]+)",
5360 r"(nic)\.(macs|ips|modes|links|bridges)",
5361 r"(disk|nic)\.(count)",
5362 "hvparams", "custom_hvparams",
5363 "custom_beparams", "custom_nicparams",
5364 ] + _SIMPLE_FIELDS +
5366 for name in constants.HVS_PARAMETERS
5367 if name not in constants.HVC_GLOBALS] +
5369 for name in constants.BES_PARAMETERS])
5370 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5376 def CheckArguments(self):
5377 _CheckOutputFields(static=self._FIELDS_STATIC,
5378 dynamic=self._FIELDS_DYNAMIC,
5379 selected=self.op.output_fields)
5381 def ExpandNames(self):
5382 self.needed_locks = {}
5383 self.share_locks[locking.LEVEL_INSTANCE] = 1
5384 self.share_locks[locking.LEVEL_NODE] = 1
5387 self.wanted = _GetWantedInstances(self, self.op.names)
5389 self.wanted = locking.ALL_SET
5391 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5392 self.do_locking = self.do_node_query and self.op.use_locking
5394 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5395 self.needed_locks[locking.LEVEL_NODE] = []
5396 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5398 def DeclareLocks(self, level):
5399 if level == locking.LEVEL_NODE and self.do_locking:
5400 self._LockInstancesNodes()
5402 def Exec(self, feedback_fn):
5403 """Computes the list of nodes and their attributes.
5406 # pylint: disable-msg=R0912
5407 # way too many branches here
5408 all_info = self.cfg.GetAllInstancesInfo()
5409 if self.wanted == locking.ALL_SET:
5410 # caller didn't specify instance names, so ordering is not important
5412 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5414 instance_names = all_info.keys()
5415 instance_names = utils.NiceSort(instance_names)
5417 # caller did specify names, so we must keep the ordering
5419 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5421 tgt_set = all_info.keys()
5422 missing = set(self.wanted).difference(tgt_set)
5424 raise errors.OpExecError("Some instances were removed before"
5425 " retrieving their data: %s" % missing)
5426 instance_names = self.wanted
5428 instance_list = [all_info[iname] for iname in instance_names]
5430 # begin data gathering
5432 nodes = frozenset([inst.primary_node for inst in instance_list])
5433 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5437 if self.do_node_query:
5439 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5441 result = node_data[name]
5443 # offline nodes will be in both lists
5444 off_nodes.append(name)
5446 bad_nodes.append(name)
5449 live_data.update(result.payload)
5450 # else no instance is alive
5452 live_data = dict([(name, {}) for name in instance_names])
5454 # end data gathering
5459 cluster = self.cfg.GetClusterInfo()
5460 for instance in instance_list:
5462 i_hv = cluster.FillHV(instance, skip_globals=True)
5463 i_be = cluster.FillBE(instance)
5464 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5465 for field in self.op.output_fields:
5466 st_match = self._FIELDS_STATIC.Matches(field)
5467 if field in self._SIMPLE_FIELDS:
5468 val = getattr(instance, field)
5469 elif field == "pnode":
5470 val = instance.primary_node
5471 elif field == "snodes":
5472 val = list(instance.secondary_nodes)
5473 elif field == "admin_state":
5474 val = instance.admin_up
5475 elif field == "oper_state":
5476 if instance.primary_node in bad_nodes:
5479 val = bool(live_data.get(instance.name))
5480 elif field == "status":
5481 if instance.primary_node in off_nodes:
5482 val = "ERROR_nodeoffline"
5483 elif instance.primary_node in bad_nodes:
5484 val = "ERROR_nodedown"
5486 running = bool(live_data.get(instance.name))
5488 if instance.admin_up:
5493 if instance.admin_up:
5497 elif field == "oper_ram":
5498 if instance.primary_node in bad_nodes:
5500 elif instance.name in live_data:
5501 val = live_data[instance.name].get("memory", "?")
5504 elif field == "oper_vcpus":
5505 if instance.primary_node in bad_nodes:
5507 elif instance.name in live_data:
5508 val = live_data[instance.name].get("vcpus", "?")
5511 elif field == "vcpus":
5512 val = i_be[constants.BE_VCPUS]
5513 elif field == "disk_template":
5514 val = instance.disk_template
5517 val = instance.nics[0].ip
5520 elif field == "nic_mode":
5522 val = i_nicp[0][constants.NIC_MODE]
5525 elif field == "nic_link":
5527 val = i_nicp[0][constants.NIC_LINK]
5530 elif field == "bridge":
5531 if (instance.nics and
5532 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5533 val = i_nicp[0][constants.NIC_LINK]
5536 elif field == "mac":
5538 val = instance.nics[0].mac
5541 elif field == "custom_nicparams":
5542 val = [nic.nicparams for nic in instance.nics]
5543 elif field == "sda_size" or field == "sdb_size":
5544 idx = ord(field[2]) - ord('a')
5546 val = instance.FindDisk(idx).size
5547 except errors.OpPrereqError:
5549 elif field == "disk_usage": # total disk usage per node
5550 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5551 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5552 elif field == "tags":
5553 val = list(instance.GetTags())
5554 elif field == "custom_hvparams":
5555 val = instance.hvparams # not filled!
5556 elif field == "hvparams":
5558 elif (field.startswith(HVPREFIX) and
5559 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5560 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5561 val = i_hv.get(field[len(HVPREFIX):], None)
5562 elif field == "custom_beparams":
5563 val = instance.beparams
5564 elif field == "beparams":
5566 elif (field.startswith(BEPREFIX) and
5567 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5568 val = i_be.get(field[len(BEPREFIX):], None)
5569 elif st_match and st_match.groups():
5570 # matches a variable list
5571 st_groups = st_match.groups()
5572 if st_groups and st_groups[0] == "disk":
5573 if st_groups[1] == "count":
5574 val = len(instance.disks)
5575 elif st_groups[1] == "sizes":
5576 val = [disk.size for disk in instance.disks]
5577 elif st_groups[1] == "size":
5579 val = instance.FindDisk(st_groups[2]).size
5580 except errors.OpPrereqError:
5583 assert False, "Unhandled disk parameter"
5584 elif st_groups[0] == "nic":
5585 if st_groups[1] == "count":
5586 val = len(instance.nics)
5587 elif st_groups[1] == "macs":
5588 val = [nic.mac for nic in instance.nics]
5589 elif st_groups[1] == "ips":
5590 val = [nic.ip for nic in instance.nics]
5591 elif st_groups[1] == "modes":
5592 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5593 elif st_groups[1] == "links":
5594 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5595 elif st_groups[1] == "bridges":
5598 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5599 val.append(nicp[constants.NIC_LINK])
5604 nic_idx = int(st_groups[2])
5605 if nic_idx >= len(instance.nics):
5608 if st_groups[1] == "mac":
5609 val = instance.nics[nic_idx].mac
5610 elif st_groups[1] == "ip":
5611 val = instance.nics[nic_idx].ip
5612 elif st_groups[1] == "mode":
5613 val = i_nicp[nic_idx][constants.NIC_MODE]
5614 elif st_groups[1] == "link":
5615 val = i_nicp[nic_idx][constants.NIC_LINK]
5616 elif st_groups[1] == "bridge":
5617 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5618 if nic_mode == constants.NIC_MODE_BRIDGED:
5619 val = i_nicp[nic_idx][constants.NIC_LINK]
5623 assert False, "Unhandled NIC parameter"
5625 assert False, ("Declared but unhandled variable parameter '%s'" %
5628 assert False, "Declared but unhandled parameter '%s'" % field
5635 class LUFailoverInstance(LogicalUnit):
5636 """Failover an instance.
5639 HPATH = "instance-failover"
5640 HTYPE = constants.HTYPE_INSTANCE
5643 ("ignore_consistency", False, ht.TBool),
5648 def ExpandNames(self):
5649 self._ExpandAndLockInstance()
5650 self.needed_locks[locking.LEVEL_NODE] = []
5651 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5653 def DeclareLocks(self, level):
5654 if level == locking.LEVEL_NODE:
5655 self._LockInstancesNodes()
5657 def BuildHooksEnv(self):
5660 This runs on master, primary and secondary nodes of the instance.
5663 instance = self.instance
5664 source_node = instance.primary_node
5665 target_node = instance.secondary_nodes[0]
5667 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5668 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5669 "OLD_PRIMARY": source_node,
5670 "OLD_SECONDARY": target_node,
5671 "NEW_PRIMARY": target_node,
5672 "NEW_SECONDARY": source_node,
5674 env.update(_BuildInstanceHookEnvByObject(self, instance))
5675 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5677 nl_post.append(source_node)
5678 return env, nl, nl_post
5680 def CheckPrereq(self):
5681 """Check prerequisites.
5683 This checks that the instance is in the cluster.
5686 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5687 assert self.instance is not None, \
5688 "Cannot retrieve locked instance %s" % self.op.instance_name
5690 bep = self.cfg.GetClusterInfo().FillBE(instance)
5691 if instance.disk_template not in constants.DTS_NET_MIRROR:
5692 raise errors.OpPrereqError("Instance's disk layout is not"
5693 " network mirrored, cannot failover.",
5696 secondary_nodes = instance.secondary_nodes
5697 if not secondary_nodes:
5698 raise errors.ProgrammerError("no secondary node but using "
5699 "a mirrored disk template")
5701 target_node = secondary_nodes[0]
5702 _CheckNodeOnline(self, target_node)
5703 _CheckNodeNotDrained(self, target_node)
5704 if instance.admin_up:
5705 # check memory requirements on the secondary node
5706 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5707 instance.name, bep[constants.BE_MEMORY],
5708 instance.hypervisor)
5710 self.LogInfo("Not checking memory on the secondary node as"
5711 " instance will not be started")
5713 # check bridge existance
5714 _CheckInstanceBridgesExist(self, instance, node=target_node)
5716 def Exec(self, feedback_fn):
5717 """Failover an instance.
5719 The failover is done by shutting it down on its present node and
5720 starting it on the secondary.
5723 instance = self.instance
5724 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5726 source_node = instance.primary_node
5727 target_node = instance.secondary_nodes[0]
5729 if instance.admin_up:
5730 feedback_fn("* checking disk consistency between source and target")
5731 for dev in instance.disks:
5732 # for drbd, these are drbd over lvm
5733 if not _CheckDiskConsistency(self, dev, target_node, False):
5734 if not self.op.ignore_consistency:
5735 raise errors.OpExecError("Disk %s is degraded on target node,"
5736 " aborting failover." % dev.iv_name)
5738 feedback_fn("* not checking disk consistency as instance is not running")
5740 feedback_fn("* shutting down instance on source node")
5741 logging.info("Shutting down instance %s on node %s",
5742 instance.name, source_node)
5744 result = self.rpc.call_instance_shutdown(source_node, instance,
5745 self.op.shutdown_timeout)
5746 msg = result.fail_msg
5748 if self.op.ignore_consistency or primary_node.offline:
5749 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5750 " Proceeding anyway. Please make sure node"
5751 " %s is down. Error details: %s",
5752 instance.name, source_node, source_node, msg)
5754 raise errors.OpExecError("Could not shutdown instance %s on"
5756 (instance.name, source_node, msg))
5758 feedback_fn("* deactivating the instance's disks on source node")
5759 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5760 raise errors.OpExecError("Can't shut down the instance's disks.")
5762 instance.primary_node = target_node
5763 # distribute new instance config to the other nodes
5764 self.cfg.Update(instance, feedback_fn)
5766 # Only start the instance if it's marked as up
5767 if instance.admin_up:
5768 feedback_fn("* activating the instance's disks on target node")
5769 logging.info("Starting instance %s on node %s",
5770 instance.name, target_node)
5772 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5773 ignore_secondaries=True)
5775 _ShutdownInstanceDisks(self, instance)
5776 raise errors.OpExecError("Can't activate the instance's disks")
5778 feedback_fn("* starting the instance on the target node")
5779 result = self.rpc.call_instance_start(target_node, instance, None, None)
5780 msg = result.fail_msg
5782 _ShutdownInstanceDisks(self, instance)
5783 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5784 (instance.name, target_node, msg))
5787 class LUMigrateInstance(LogicalUnit):
5788 """Migrate an instance.
5790 This is migration without shutting down, compared to the failover,
5791 which is done with shutdown.
5794 HPATH = "instance-migrate"
5795 HTYPE = constants.HTYPE_INSTANCE
5800 ("cleanup", False, ht.TBool),
5805 def ExpandNames(self):
5806 self._ExpandAndLockInstance()
5808 self.needed_locks[locking.LEVEL_NODE] = []
5809 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5811 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5813 self.tasklets = [self._migrater]
5815 def DeclareLocks(self, level):
5816 if level == locking.LEVEL_NODE:
5817 self._LockInstancesNodes()
5819 def BuildHooksEnv(self):
5822 This runs on master, primary and secondary nodes of the instance.
5825 instance = self._migrater.instance
5826 source_node = instance.primary_node
5827 target_node = instance.secondary_nodes[0]
5828 env = _BuildInstanceHookEnvByObject(self, instance)
5829 env["MIGRATE_LIVE"] = self._migrater.live
5830 env["MIGRATE_CLEANUP"] = self.op.cleanup
5832 "OLD_PRIMARY": source_node,
5833 "OLD_SECONDARY": target_node,
5834 "NEW_PRIMARY": target_node,
5835 "NEW_SECONDARY": source_node,
5837 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5839 nl_post.append(source_node)
5840 return env, nl, nl_post
5843 class LUMoveInstance(LogicalUnit):
5844 """Move an instance by data-copying.
5847 HPATH = "instance-move"
5848 HTYPE = constants.HTYPE_INSTANCE
5851 ("target_node", ht.NoDefault, ht.TNonEmptyString),
5856 def ExpandNames(self):
5857 self._ExpandAndLockInstance()
5858 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5859 self.op.target_node = target_node
5860 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5861 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5863 def DeclareLocks(self, level):
5864 if level == locking.LEVEL_NODE:
5865 self._LockInstancesNodes(primary_only=True)
5867 def BuildHooksEnv(self):
5870 This runs on master, primary and secondary nodes of the instance.
5874 "TARGET_NODE": self.op.target_node,
5875 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5877 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5878 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5879 self.op.target_node]
5882 def CheckPrereq(self):
5883 """Check prerequisites.
5885 This checks that the instance is in the cluster.
5888 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5889 assert self.instance is not None, \
5890 "Cannot retrieve locked instance %s" % self.op.instance_name
5892 node = self.cfg.GetNodeInfo(self.op.target_node)
5893 assert node is not None, \
5894 "Cannot retrieve locked node %s" % self.op.target_node
5896 self.target_node = target_node = node.name
5898 if target_node == instance.primary_node:
5899 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5900 (instance.name, target_node),
5903 bep = self.cfg.GetClusterInfo().FillBE(instance)
5905 for idx, dsk in enumerate(instance.disks):
5906 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5907 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5908 " cannot copy" % idx, errors.ECODE_STATE)
5910 _CheckNodeOnline(self, target_node)
5911 _CheckNodeNotDrained(self, target_node)
5912 _CheckNodeVmCapable(self, target_node)
5914 if instance.admin_up:
5915 # check memory requirements on the secondary node
5916 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5917 instance.name, bep[constants.BE_MEMORY],
5918 instance.hypervisor)
5920 self.LogInfo("Not checking memory on the secondary node as"
5921 " instance will not be started")
5923 # check bridge existance
5924 _CheckInstanceBridgesExist(self, instance, node=target_node)
5926 def Exec(self, feedback_fn):
5927 """Move an instance.
5929 The move is done by shutting it down on its present node, copying
5930 the data over (slow) and starting it on the new node.
5933 instance = self.instance
5935 source_node = instance.primary_node
5936 target_node = self.target_node
5938 self.LogInfo("Shutting down instance %s on source node %s",
5939 instance.name, source_node)
5941 result = self.rpc.call_instance_shutdown(source_node, instance,
5942 self.op.shutdown_timeout)
5943 msg = result.fail_msg
5945 if self.op.ignore_consistency:
5946 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5947 " Proceeding anyway. Please make sure node"
5948 " %s is down. Error details: %s",
5949 instance.name, source_node, source_node, msg)
5951 raise errors.OpExecError("Could not shutdown instance %s on"
5953 (instance.name, source_node, msg))
5955 # create the target disks
5957 _CreateDisks(self, instance, target_node=target_node)
5958 except errors.OpExecError:
5959 self.LogWarning("Device creation failed, reverting...")
5961 _RemoveDisks(self, instance, target_node=target_node)
5963 self.cfg.ReleaseDRBDMinors(instance.name)
5966 cluster_name = self.cfg.GetClusterInfo().cluster_name
5969 # activate, get path, copy the data over
5970 for idx, disk in enumerate(instance.disks):
5971 self.LogInfo("Copying data for disk %d", idx)
5972 result = self.rpc.call_blockdev_assemble(target_node, disk,
5973 instance.name, True)
5975 self.LogWarning("Can't assemble newly created disk %d: %s",
5976 idx, result.fail_msg)
5977 errs.append(result.fail_msg)
5979 dev_path = result.payload
5980 result = self.rpc.call_blockdev_export(source_node, disk,
5981 target_node, dev_path,
5984 self.LogWarning("Can't copy data over for disk %d: %s",
5985 idx, result.fail_msg)
5986 errs.append(result.fail_msg)
5990 self.LogWarning("Some disks failed to copy, aborting")
5992 _RemoveDisks(self, instance, target_node=target_node)
5994 self.cfg.ReleaseDRBDMinors(instance.name)
5995 raise errors.OpExecError("Errors during disk copy: %s" %
5998 instance.primary_node = target_node
5999 self.cfg.Update(instance, feedback_fn)
6001 self.LogInfo("Removing the disks on the original node")
6002 _RemoveDisks(self, instance, target_node=source_node)
6004 # Only start the instance if it's marked as up
6005 if instance.admin_up:
6006 self.LogInfo("Starting instance %s on node %s",
6007 instance.name, target_node)
6009 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6010 ignore_secondaries=True)
6012 _ShutdownInstanceDisks(self, instance)
6013 raise errors.OpExecError("Can't activate the instance's disks")
6015 result = self.rpc.call_instance_start(target_node, instance, None, None)
6016 msg = result.fail_msg
6018 _ShutdownInstanceDisks(self, instance)
6019 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6020 (instance.name, target_node, msg))
6023 class LUMigrateNode(LogicalUnit):
6024 """Migrate all instances from a node.
6027 HPATH = "node-migrate"
6028 HTYPE = constants.HTYPE_NODE
6036 def ExpandNames(self):
6037 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6039 self.needed_locks = {
6040 locking.LEVEL_NODE: [self.op.node_name],
6043 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6045 # Create tasklets for migrating instances for all instances on this node
6049 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6050 logging.debug("Migrating instance %s", inst.name)
6051 names.append(inst.name)
6053 tasklets.append(TLMigrateInstance(self, inst.name, False))
6055 self.tasklets = tasklets
6057 # Declare instance locks
6058 self.needed_locks[locking.LEVEL_INSTANCE] = names
6060 def DeclareLocks(self, level):
6061 if level == locking.LEVEL_NODE:
6062 self._LockInstancesNodes()
6064 def BuildHooksEnv(self):
6067 This runs on the master, the primary and all the secondaries.
6071 "NODE_NAME": self.op.node_name,
6074 nl = [self.cfg.GetMasterNode()]
6076 return (env, nl, nl)
6079 class TLMigrateInstance(Tasklet):
6080 """Tasklet class for instance migration.
6083 @ivar live: whether the migration will be done live or non-live;
6084 this variable is initalized only after CheckPrereq has run
6087 def __init__(self, lu, instance_name, cleanup):
6088 """Initializes this class.
6091 Tasklet.__init__(self, lu)
6094 self.instance_name = instance_name
6095 self.cleanup = cleanup
6096 self.live = False # will be overridden later
6098 def CheckPrereq(self):
6099 """Check prerequisites.
6101 This checks that the instance is in the cluster.
6104 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6105 instance = self.cfg.GetInstanceInfo(instance_name)
6106 assert instance is not None
6108 if instance.disk_template != constants.DT_DRBD8:
6109 raise errors.OpPrereqError("Instance's disk layout is not"
6110 " drbd8, cannot migrate.", errors.ECODE_STATE)
6112 secondary_nodes = instance.secondary_nodes
6113 if not secondary_nodes:
6114 raise errors.ConfigurationError("No secondary node but using"
6115 " drbd8 disk template")
6117 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6119 target_node = secondary_nodes[0]
6120 # check memory requirements on the secondary node
6121 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6122 instance.name, i_be[constants.BE_MEMORY],
6123 instance.hypervisor)
6125 # check bridge existance
6126 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6128 if not self.cleanup:
6129 _CheckNodeNotDrained(self.lu, target_node)
6130 result = self.rpc.call_instance_migratable(instance.primary_node,
6132 result.Raise("Can't migrate, please use failover",
6133 prereq=True, ecode=errors.ECODE_STATE)
6135 self.instance = instance
6137 if self.lu.op.live is not None and self.lu.op.mode is not None:
6138 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6139 " parameters are accepted",
6141 if self.lu.op.live is not None:
6143 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6145 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6146 # reset the 'live' parameter to None so that repeated
6147 # invocations of CheckPrereq do not raise an exception
6148 self.lu.op.live = None
6149 elif self.lu.op.mode is None:
6150 # read the default value from the hypervisor
6151 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6152 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6154 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6156 def _WaitUntilSync(self):
6157 """Poll with custom rpc for disk sync.
6159 This uses our own step-based rpc call.
6162 self.feedback_fn("* wait until resync is done")
6166 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6168 self.instance.disks)
6170 for node, nres in result.items():
6171 nres.Raise("Cannot resync disks on node %s" % node)
6172 node_done, node_percent = nres.payload
6173 all_done = all_done and node_done
6174 if node_percent is not None:
6175 min_percent = min(min_percent, node_percent)
6177 if min_percent < 100:
6178 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6181 def _EnsureSecondary(self, node):
6182 """Demote a node to secondary.
6185 self.feedback_fn("* switching node %s to secondary mode" % node)
6187 for dev in self.instance.disks:
6188 self.cfg.SetDiskID(dev, node)
6190 result = self.rpc.call_blockdev_close(node, self.instance.name,
6191 self.instance.disks)
6192 result.Raise("Cannot change disk to secondary on node %s" % node)
6194 def _GoStandalone(self):
6195 """Disconnect from the network.
6198 self.feedback_fn("* changing into standalone mode")
6199 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6200 self.instance.disks)
6201 for node, nres in result.items():
6202 nres.Raise("Cannot disconnect disks node %s" % node)
6204 def _GoReconnect(self, multimaster):
6205 """Reconnect to the network.
6211 msg = "single-master"
6212 self.feedback_fn("* changing disks into %s mode" % msg)
6213 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6214 self.instance.disks,
6215 self.instance.name, multimaster)
6216 for node, nres in result.items():
6217 nres.Raise("Cannot change disks config on node %s" % node)
6219 def _ExecCleanup(self):
6220 """Try to cleanup after a failed migration.
6222 The cleanup is done by:
6223 - check that the instance is running only on one node
6224 (and update the config if needed)
6225 - change disks on its secondary node to secondary
6226 - wait until disks are fully synchronized
6227 - disconnect from the network
6228 - change disks into single-master mode
6229 - wait again until disks are fully synchronized
6232 instance = self.instance
6233 target_node = self.target_node
6234 source_node = self.source_node
6236 # check running on only one node
6237 self.feedback_fn("* checking where the instance actually runs"
6238 " (if this hangs, the hypervisor might be in"
6240 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6241 for node, result in ins_l.items():
6242 result.Raise("Can't contact node %s" % node)
6244 runningon_source = instance.name in ins_l[source_node].payload
6245 runningon_target = instance.name in ins_l[target_node].payload
6247 if runningon_source and runningon_target:
6248 raise errors.OpExecError("Instance seems to be running on two nodes,"
6249 " or the hypervisor is confused. You will have"
6250 " to ensure manually that it runs only on one"
6251 " and restart this operation.")
6253 if not (runningon_source or runningon_target):
6254 raise errors.OpExecError("Instance does not seem to be running at all."
6255 " In this case, it's safer to repair by"
6256 " running 'gnt-instance stop' to ensure disk"
6257 " shutdown, and then restarting it.")
6259 if runningon_target:
6260 # the migration has actually succeeded, we need to update the config
6261 self.feedback_fn("* instance running on secondary node (%s),"
6262 " updating config" % target_node)
6263 instance.primary_node = target_node
6264 self.cfg.Update(instance, self.feedback_fn)
6265 demoted_node = source_node
6267 self.feedback_fn("* instance confirmed to be running on its"
6268 " primary node (%s)" % source_node)
6269 demoted_node = target_node
6271 self._EnsureSecondary(demoted_node)
6273 self._WaitUntilSync()
6274 except errors.OpExecError:
6275 # we ignore here errors, since if the device is standalone, it
6276 # won't be able to sync
6278 self._GoStandalone()
6279 self._GoReconnect(False)
6280 self._WaitUntilSync()
6282 self.feedback_fn("* done")
6284 def _RevertDiskStatus(self):
6285 """Try to revert the disk status after a failed migration.
6288 target_node = self.target_node
6290 self._EnsureSecondary(target_node)
6291 self._GoStandalone()
6292 self._GoReconnect(False)
6293 self._WaitUntilSync()
6294 except errors.OpExecError, err:
6295 self.lu.LogWarning("Migration failed and I can't reconnect the"
6296 " drives: error '%s'\n"
6297 "Please look and recover the instance status" %
6300 def _AbortMigration(self):
6301 """Call the hypervisor code to abort a started migration.
6304 instance = self.instance
6305 target_node = self.target_node
6306 migration_info = self.migration_info
6308 abort_result = self.rpc.call_finalize_migration(target_node,
6312 abort_msg = abort_result.fail_msg
6314 logging.error("Aborting migration failed on target node %s: %s",
6315 target_node, abort_msg)
6316 # Don't raise an exception here, as we stil have to try to revert the
6317 # disk status, even if this step failed.
6319 def _ExecMigration(self):
6320 """Migrate an instance.
6322 The migrate is done by:
6323 - change the disks into dual-master mode
6324 - wait until disks are fully synchronized again
6325 - migrate the instance
6326 - change disks on the new secondary node (the old primary) to secondary
6327 - wait until disks are fully synchronized
6328 - change disks into single-master mode
6331 instance = self.instance
6332 target_node = self.target_node
6333 source_node = self.source_node
6335 self.feedback_fn("* checking disk consistency between source and target")
6336 for dev in instance.disks:
6337 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6338 raise errors.OpExecError("Disk %s is degraded or not fully"
6339 " synchronized on target node,"
6340 " aborting migrate." % dev.iv_name)
6342 # First get the migration information from the remote node
6343 result = self.rpc.call_migration_info(source_node, instance)
6344 msg = result.fail_msg
6346 log_err = ("Failed fetching source migration information from %s: %s" %
6348 logging.error(log_err)
6349 raise errors.OpExecError(log_err)
6351 self.migration_info = migration_info = result.payload
6353 # Then switch the disks to master/master mode
6354 self._EnsureSecondary(target_node)
6355 self._GoStandalone()
6356 self._GoReconnect(True)
6357 self._WaitUntilSync()
6359 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6360 result = self.rpc.call_accept_instance(target_node,
6363 self.nodes_ip[target_node])
6365 msg = result.fail_msg
6367 logging.error("Instance pre-migration failed, trying to revert"
6368 " disk status: %s", msg)
6369 self.feedback_fn("Pre-migration failed, aborting")
6370 self._AbortMigration()
6371 self._RevertDiskStatus()
6372 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6373 (instance.name, msg))
6375 self.feedback_fn("* migrating instance to %s" % target_node)
6377 result = self.rpc.call_instance_migrate(source_node, instance,
6378 self.nodes_ip[target_node],
6380 msg = result.fail_msg
6382 logging.error("Instance migration failed, trying to revert"
6383 " disk status: %s", msg)
6384 self.feedback_fn("Migration failed, aborting")
6385 self._AbortMigration()
6386 self._RevertDiskStatus()
6387 raise errors.OpExecError("Could not migrate instance %s: %s" %
6388 (instance.name, msg))
6391 instance.primary_node = target_node
6392 # distribute new instance config to the other nodes
6393 self.cfg.Update(instance, self.feedback_fn)
6395 result = self.rpc.call_finalize_migration(target_node,
6399 msg = result.fail_msg
6401 logging.error("Instance migration succeeded, but finalization failed:"
6403 raise errors.OpExecError("Could not finalize instance migration: %s" %
6406 self._EnsureSecondary(source_node)
6407 self._WaitUntilSync()
6408 self._GoStandalone()
6409 self._GoReconnect(False)
6410 self._WaitUntilSync()
6412 self.feedback_fn("* done")
6414 def Exec(self, feedback_fn):
6415 """Perform the migration.
6418 feedback_fn("Migrating instance %s" % self.instance.name)
6420 self.feedback_fn = feedback_fn
6422 self.source_node = self.instance.primary_node
6423 self.target_node = self.instance.secondary_nodes[0]
6424 self.all_nodes = [self.source_node, self.target_node]
6426 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6427 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6431 return self._ExecCleanup()
6433 return self._ExecMigration()
6436 def _CreateBlockDev(lu, node, instance, device, force_create,
6438 """Create a tree of block devices on a given node.
6440 If this device type has to be created on secondaries, create it and
6443 If not, just recurse to children keeping the same 'force' value.
6445 @param lu: the lu on whose behalf we execute
6446 @param node: the node on which to create the device
6447 @type instance: L{objects.Instance}
6448 @param instance: the instance which owns the device
6449 @type device: L{objects.Disk}
6450 @param device: the device to create
6451 @type force_create: boolean
6452 @param force_create: whether to force creation of this device; this
6453 will be change to True whenever we find a device which has
6454 CreateOnSecondary() attribute
6455 @param info: the extra 'metadata' we should attach to the device
6456 (this will be represented as a LVM tag)
6457 @type force_open: boolean
6458 @param force_open: this parameter will be passes to the
6459 L{backend.BlockdevCreate} function where it specifies
6460 whether we run on primary or not, and it affects both
6461 the child assembly and the device own Open() execution
6464 if device.CreateOnSecondary():
6468 for child in device.children:
6469 _CreateBlockDev(lu, node, instance, child, force_create,
6472 if not force_create:
6475 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6478 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6479 """Create a single block device on a given node.
6481 This will not recurse over children of the device, so they must be
6484 @param lu: the lu on whose behalf we execute
6485 @param node: the node on which to create the device
6486 @type instance: L{objects.Instance}
6487 @param instance: the instance which owns the device
6488 @type device: L{objects.Disk}
6489 @param device: the device to create
6490 @param info: the extra 'metadata' we should attach to the device
6491 (this will be represented as a LVM tag)
6492 @type force_open: boolean
6493 @param force_open: this parameter will be passes to the
6494 L{backend.BlockdevCreate} function where it specifies
6495 whether we run on primary or not, and it affects both
6496 the child assembly and the device own Open() execution
6499 lu.cfg.SetDiskID(device, node)
6500 result = lu.rpc.call_blockdev_create(node, device, device.size,
6501 instance.name, force_open, info)
6502 result.Raise("Can't create block device %s on"
6503 " node %s for instance %s" % (device, node, instance.name))
6504 if device.physical_id is None:
6505 device.physical_id = result.payload
6508 def _GenerateUniqueNames(lu, exts):
6509 """Generate a suitable LV name.
6511 This will generate a logical volume name for the given instance.
6516 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6517 results.append("%s%s" % (new_id, val))
6521 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6523 """Generate a drbd8 device complete with its children.
6526 port = lu.cfg.AllocatePort()
6527 vgname = lu.cfg.GetVGName()
6528 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6529 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6530 logical_id=(vgname, names[0]))
6531 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6532 logical_id=(vgname, names[1]))
6533 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6534 logical_id=(primary, secondary, port,
6537 children=[dev_data, dev_meta],
6542 def _GenerateDiskTemplate(lu, template_name,
6543 instance_name, primary_node,
6544 secondary_nodes, disk_info,
6545 file_storage_dir, file_driver,
6547 """Generate the entire disk layout for a given template type.
6550 #TODO: compute space requirements
6552 vgname = lu.cfg.GetVGName()
6553 disk_count = len(disk_info)
6555 if template_name == constants.DT_DISKLESS:
6557 elif template_name == constants.DT_PLAIN:
6558 if len(secondary_nodes) != 0:
6559 raise errors.ProgrammerError("Wrong template configuration")
6561 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6562 for i in range(disk_count)])
6563 for idx, disk in enumerate(disk_info):
6564 disk_index = idx + base_index
6565 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6566 logical_id=(vgname, names[idx]),
6567 iv_name="disk/%d" % disk_index,
6569 disks.append(disk_dev)
6570 elif template_name == constants.DT_DRBD8:
6571 if len(secondary_nodes) != 1:
6572 raise errors.ProgrammerError("Wrong template configuration")
6573 remote_node = secondary_nodes[0]
6574 minors = lu.cfg.AllocateDRBDMinor(
6575 [primary_node, remote_node] * len(disk_info), instance_name)
6578 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6579 for i in range(disk_count)]):
6580 names.append(lv_prefix + "_data")
6581 names.append(lv_prefix + "_meta")
6582 for idx, disk in enumerate(disk_info):
6583 disk_index = idx + base_index
6584 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6585 disk["size"], names[idx*2:idx*2+2],
6586 "disk/%d" % disk_index,
6587 minors[idx*2], minors[idx*2+1])
6588 disk_dev.mode = disk["mode"]
6589 disks.append(disk_dev)
6590 elif template_name == constants.DT_FILE:
6591 if len(secondary_nodes) != 0:
6592 raise errors.ProgrammerError("Wrong template configuration")
6594 _RequireFileStorage()
6596 for idx, disk in enumerate(disk_info):
6597 disk_index = idx + base_index
6598 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6599 iv_name="disk/%d" % disk_index,
6600 logical_id=(file_driver,
6601 "%s/disk%d" % (file_storage_dir,
6604 disks.append(disk_dev)
6606 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6610 def _GetInstanceInfoText(instance):
6611 """Compute that text that should be added to the disk's metadata.
6614 return "originstname+%s" % instance.name
6617 def _CalcEta(time_taken, written, total_size):
6618 """Calculates the ETA based on size written and total size.
6620 @param time_taken: The time taken so far
6621 @param written: amount written so far
6622 @param total_size: The total size of data to be written
6623 @return: The remaining time in seconds
6626 avg_time = time_taken / float(written)
6627 return (total_size - written) * avg_time
6630 def _WipeDisks(lu, instance):
6631 """Wipes instance disks.
6633 @type lu: L{LogicalUnit}
6634 @param lu: the logical unit on whose behalf we execute
6635 @type instance: L{objects.Instance}
6636 @param instance: the instance whose disks we should create
6637 @return: the success of the wipe
6640 node = instance.primary_node
6641 for idx, device in enumerate(instance.disks):
6642 lu.LogInfo("* Wiping disk %d", idx)
6643 logging.info("Wiping disk %d for instance %s", idx, instance.name)
6645 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6646 # MAX_WIPE_CHUNK at max
6647 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6648 constants.MIN_WIPE_CHUNK_PERCENT)
6653 start_time = time.time()
6655 while offset < size:
6656 wipe_size = min(wipe_chunk_size, size - offset)
6657 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6658 result.Raise("Could not wipe disk %d at offset %d for size %d" %
6659 (idx, offset, wipe_size))
6662 if now - last_output >= 60:
6663 eta = _CalcEta(now - start_time, offset, size)
6664 lu.LogInfo(" - done: %.1f%% ETA: %s" %
6665 (offset / float(size) * 100, utils.FormatSeconds(eta)))
6669 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6670 """Create all disks for an instance.
6672 This abstracts away some work from AddInstance.
6674 @type lu: L{LogicalUnit}
6675 @param lu: the logical unit on whose behalf we execute
6676 @type instance: L{objects.Instance}
6677 @param instance: the instance whose disks we should create
6679 @param to_skip: list of indices to skip
6680 @type target_node: string
6681 @param target_node: if passed, overrides the target node for creation
6683 @return: the success of the creation
6686 info = _GetInstanceInfoText(instance)
6687 if target_node is None:
6688 pnode = instance.primary_node
6689 all_nodes = instance.all_nodes
6694 if instance.disk_template == constants.DT_FILE:
6695 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6696 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6698 result.Raise("Failed to create directory '%s' on"
6699 " node %s" % (file_storage_dir, pnode))
6701 # Note: this needs to be kept in sync with adding of disks in
6702 # LUSetInstanceParams
6703 for idx, device in enumerate(instance.disks):
6704 if to_skip and idx in to_skip:
6706 logging.info("Creating volume %s for instance %s",
6707 device.iv_name, instance.name)
6709 for node in all_nodes:
6710 f_create = node == pnode
6711 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6714 def _RemoveDisks(lu, instance, target_node=None):
6715 """Remove all disks for an instance.
6717 This abstracts away some work from `AddInstance()` and
6718 `RemoveInstance()`. Note that in case some of the devices couldn't
6719 be removed, the removal will continue with the other ones (compare
6720 with `_CreateDisks()`).
6722 @type lu: L{LogicalUnit}
6723 @param lu: the logical unit on whose behalf we execute
6724 @type instance: L{objects.Instance}
6725 @param instance: the instance whose disks we should remove
6726 @type target_node: string
6727 @param target_node: used to override the node on which to remove the disks
6729 @return: the success of the removal
6732 logging.info("Removing block devices for instance %s", instance.name)
6735 for device in instance.disks:
6737 edata = [(target_node, device)]
6739 edata = device.ComputeNodeTree(instance.primary_node)
6740 for node, disk in edata:
6741 lu.cfg.SetDiskID(disk, node)
6742 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6744 lu.LogWarning("Could not remove block device %s on node %s,"
6745 " continuing anyway: %s", device.iv_name, node, msg)
6748 if instance.disk_template == constants.DT_FILE:
6749 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6753 tgt = instance.primary_node
6754 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6756 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6757 file_storage_dir, instance.primary_node, result.fail_msg)
6763 def _ComputeDiskSize(disk_template, disks):
6764 """Compute disk size requirements in the volume group
6767 # Required free disk space as a function of disk and swap space
6769 constants.DT_DISKLESS: None,
6770 constants.DT_PLAIN: sum(d["size"] for d in disks),
6771 # 128 MB are added for drbd metadata for each disk
6772 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6773 constants.DT_FILE: None,
6776 if disk_template not in req_size_dict:
6777 raise errors.ProgrammerError("Disk template '%s' size requirement"
6778 " is unknown" % disk_template)
6780 return req_size_dict[disk_template]
6783 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6784 """Hypervisor parameter validation.
6786 This function abstract the hypervisor parameter validation to be
6787 used in both instance create and instance modify.
6789 @type lu: L{LogicalUnit}
6790 @param lu: the logical unit for which we check
6791 @type nodenames: list
6792 @param nodenames: the list of nodes on which we should check
6793 @type hvname: string
6794 @param hvname: the name of the hypervisor we should use
6795 @type hvparams: dict
6796 @param hvparams: the parameters which we need to check
6797 @raise errors.OpPrereqError: if the parameters are not valid
6800 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6803 for node in nodenames:
6807 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6810 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6811 """OS parameters validation.
6813 @type lu: L{LogicalUnit}
6814 @param lu: the logical unit for which we check
6815 @type required: boolean
6816 @param required: whether the validation should fail if the OS is not
6818 @type nodenames: list
6819 @param nodenames: the list of nodes on which we should check
6820 @type osname: string
6821 @param osname: the name of the hypervisor we should use
6822 @type osparams: dict
6823 @param osparams: the parameters which we need to check
6824 @raise errors.OpPrereqError: if the parameters are not valid
6827 result = lu.rpc.call_os_validate(required, nodenames, osname,
6828 [constants.OS_VALIDATE_PARAMETERS],
6830 for node, nres in result.items():
6831 # we don't check for offline cases since this should be run only
6832 # against the master node and/or an instance's nodes
6833 nres.Raise("OS Parameters validation failed on node %s" % node)
6834 if not nres.payload:
6835 lu.LogInfo("OS %s not found on node %s, validation skipped",
6839 class LUCreateInstance(LogicalUnit):
6840 """Create an instance.
6843 HPATH = "instance-add"
6844 HTYPE = constants.HTYPE_INSTANCE
6847 ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6848 ("start", True, ht.TBool),
6849 ("wait_for_sync", True, ht.TBool),
6850 ("ip_check", True, ht.TBool),
6851 ("name_check", True, ht.TBool),
6852 ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6853 ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6854 ("hvparams", ht.EmptyDict, ht.TDict),
6855 ("beparams", ht.EmptyDict, ht.TDict),
6856 ("osparams", ht.EmptyDict, ht.TDict),
6857 ("no_install", None, ht.TMaybeBool),
6858 ("os_type", None, ht.TMaybeString),
6859 ("force_variant", False, ht.TBool),
6860 ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6861 ("source_x509_ca", None, ht.TMaybeString),
6862 ("source_instance_name", None, ht.TMaybeString),
6863 ("src_node", None, ht.TMaybeString),
6864 ("src_path", None, ht.TMaybeString),
6865 ("pnode", None, ht.TMaybeString),
6866 ("snode", None, ht.TMaybeString),
6867 ("iallocator", None, ht.TMaybeString),
6868 ("hypervisor", None, ht.TMaybeString),
6869 ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6870 ("identify_defaults", False, ht.TBool),
6871 ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6872 ("file_storage_dir", None, ht.TMaybeString),
6876 def CheckArguments(self):
6880 # do not require name_check to ease forward/backward compatibility
6882 if self.op.no_install and self.op.start:
6883 self.LogInfo("No-installation mode selected, disabling startup")
6884 self.op.start = False
6885 # validate/normalize the instance name
6886 self.op.instance_name = \
6887 netutils.Hostname.GetNormalizedName(self.op.instance_name)
6889 if self.op.ip_check and not self.op.name_check:
6890 # TODO: make the ip check more flexible and not depend on the name check
6891 raise errors.OpPrereqError("Cannot do ip check without a name check",
6894 # check nics' parameter names
6895 for nic in self.op.nics:
6896 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6898 # check disks. parameter names and consistent adopt/no-adopt strategy
6899 has_adopt = has_no_adopt = False
6900 for disk in self.op.disks:
6901 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6906 if has_adopt and has_no_adopt:
6907 raise errors.OpPrereqError("Either all disks are adopted or none is",
6910 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6911 raise errors.OpPrereqError("Disk adoption is not supported for the"
6912 " '%s' disk template" %
6913 self.op.disk_template,
6915 if self.op.iallocator is not None:
6916 raise errors.OpPrereqError("Disk adoption not allowed with an"
6917 " iallocator script", errors.ECODE_INVAL)
6918 if self.op.mode == constants.INSTANCE_IMPORT:
6919 raise errors.OpPrereqError("Disk adoption not allowed for"
6920 " instance import", errors.ECODE_INVAL)
6922 self.adopt_disks = has_adopt
6924 # instance name verification
6925 if self.op.name_check:
6926 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6927 self.op.instance_name = self.hostname1.name
6928 # used in CheckPrereq for ip ping check
6929 self.check_ip = self.hostname1.ip
6931 self.check_ip = None
6933 # file storage checks
6934 if (self.op.file_driver and
6935 not self.op.file_driver in constants.FILE_DRIVER):
6936 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6937 self.op.file_driver, errors.ECODE_INVAL)
6939 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6940 raise errors.OpPrereqError("File storage directory path not absolute",
6943 ### Node/iallocator related checks
6944 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6946 if self.op.pnode is not None:
6947 if self.op.disk_template in constants.DTS_NET_MIRROR:
6948 if self.op.snode is None:
6949 raise errors.OpPrereqError("The networked disk templates need"
6950 " a mirror node", errors.ECODE_INVAL)
6952 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6954 self.op.snode = None
6956 self._cds = _GetClusterDomainSecret()
6958 if self.op.mode == constants.INSTANCE_IMPORT:
6959 # On import force_variant must be True, because if we forced it at
6960 # initial install, our only chance when importing it back is that it
6962 self.op.force_variant = True
6964 if self.op.no_install:
6965 self.LogInfo("No-installation mode has no effect during import")
6967 elif self.op.mode == constants.INSTANCE_CREATE:
6968 if self.op.os_type is None:
6969 raise errors.OpPrereqError("No guest OS specified",
6971 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6972 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6973 " installation" % self.op.os_type,
6975 if self.op.disk_template is None:
6976 raise errors.OpPrereqError("No disk template specified",
6979 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6980 # Check handshake to ensure both clusters have the same domain secret
6981 src_handshake = self.op.source_handshake
6982 if not src_handshake:
6983 raise errors.OpPrereqError("Missing source handshake",
6986 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6989 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6992 # Load and check source CA
6993 self.source_x509_ca_pem = self.op.source_x509_ca
6994 if not self.source_x509_ca_pem:
6995 raise errors.OpPrereqError("Missing source X509 CA",
6999 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7001 except OpenSSL.crypto.Error, err:
7002 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7003 (err, ), errors.ECODE_INVAL)
7005 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7006 if errcode is not None:
7007 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7010 self.source_x509_ca = cert
7012 src_instance_name = self.op.source_instance_name
7013 if not src_instance_name:
7014 raise errors.OpPrereqError("Missing source instance name",
7017 self.source_instance_name = \
7018 netutils.GetHostname(name=src_instance_name).name
7021 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7022 self.op.mode, errors.ECODE_INVAL)
7024 def ExpandNames(self):
7025 """ExpandNames for CreateInstance.
7027 Figure out the right locks for instance creation.
7030 self.needed_locks = {}
7032 instance_name = self.op.instance_name
7033 # this is just a preventive check, but someone might still add this
7034 # instance in the meantime, and creation will fail at lock-add time
7035 if instance_name in self.cfg.GetInstanceList():
7036 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7037 instance_name, errors.ECODE_EXISTS)
7039 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7041 if self.op.iallocator:
7042 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7044 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7045 nodelist = [self.op.pnode]
7046 if self.op.snode is not None:
7047 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7048 nodelist.append(self.op.snode)
7049 self.needed_locks[locking.LEVEL_NODE] = nodelist
7051 # in case of import lock the source node too
7052 if self.op.mode == constants.INSTANCE_IMPORT:
7053 src_node = self.op.src_node
7054 src_path = self.op.src_path
7056 if src_path is None:
7057 self.op.src_path = src_path = self.op.instance_name
7059 if src_node is None:
7060 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7061 self.op.src_node = None
7062 if os.path.isabs(src_path):
7063 raise errors.OpPrereqError("Importing an instance from an absolute"
7064 " path requires a source node option.",
7067 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7068 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7069 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7070 if not os.path.isabs(src_path):
7071 self.op.src_path = src_path = \
7072 utils.PathJoin(constants.EXPORT_DIR, src_path)
7074 def _RunAllocator(self):
7075 """Run the allocator based on input opcode.
7078 nics = [n.ToDict() for n in self.nics]
7079 ial = IAllocator(self.cfg, self.rpc,
7080 mode=constants.IALLOCATOR_MODE_ALLOC,
7081 name=self.op.instance_name,
7082 disk_template=self.op.disk_template,
7085 vcpus=self.be_full[constants.BE_VCPUS],
7086 mem_size=self.be_full[constants.BE_MEMORY],
7089 hypervisor=self.op.hypervisor,
7092 ial.Run(self.op.iallocator)
7095 raise errors.OpPrereqError("Can't compute nodes using"
7096 " iallocator '%s': %s" %
7097 (self.op.iallocator, ial.info),
7099 if len(ial.result) != ial.required_nodes:
7100 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7101 " of nodes (%s), required %s" %
7102 (self.op.iallocator, len(ial.result),
7103 ial.required_nodes), errors.ECODE_FAULT)
7104 self.op.pnode = ial.result[0]
7105 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7106 self.op.instance_name, self.op.iallocator,
7107 utils.CommaJoin(ial.result))
7108 if ial.required_nodes == 2:
7109 self.op.snode = ial.result[1]
7111 def BuildHooksEnv(self):
7114 This runs on master, primary and secondary nodes of the instance.
7118 "ADD_MODE": self.op.mode,
7120 if self.op.mode == constants.INSTANCE_IMPORT:
7121 env["SRC_NODE"] = self.op.src_node
7122 env["SRC_PATH"] = self.op.src_path
7123 env["SRC_IMAGES"] = self.src_images
7125 env.update(_BuildInstanceHookEnv(
7126 name=self.op.instance_name,
7127 primary_node=self.op.pnode,
7128 secondary_nodes=self.secondaries,
7129 status=self.op.start,
7130 os_type=self.op.os_type,
7131 memory=self.be_full[constants.BE_MEMORY],
7132 vcpus=self.be_full[constants.BE_VCPUS],
7133 nics=_NICListToTuple(self, self.nics),
7134 disk_template=self.op.disk_template,
7135 disks=[(d["size"], d["mode"]) for d in self.disks],
7138 hypervisor_name=self.op.hypervisor,
7141 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7145 def _ReadExportInfo(self):
7146 """Reads the export information from disk.
7148 It will override the opcode source node and path with the actual
7149 information, if these two were not specified before.
7151 @return: the export information
7154 assert self.op.mode == constants.INSTANCE_IMPORT
7156 src_node = self.op.src_node
7157 src_path = self.op.src_path
7159 if src_node is None:
7160 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7161 exp_list = self.rpc.call_export_list(locked_nodes)
7163 for node in exp_list:
7164 if exp_list[node].fail_msg:
7166 if src_path in exp_list[node].payload:
7168 self.op.src_node = src_node = node
7169 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7173 raise errors.OpPrereqError("No export found for relative path %s" %
7174 src_path, errors.ECODE_INVAL)
7176 _CheckNodeOnline(self, src_node)
7177 result = self.rpc.call_export_info(src_node, src_path)
7178 result.Raise("No export or invalid export found in dir %s" % src_path)
7180 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7181 if not export_info.has_section(constants.INISECT_EXP):
7182 raise errors.ProgrammerError("Corrupted export config",
7183 errors.ECODE_ENVIRON)
7185 ei_version = export_info.get(constants.INISECT_EXP, "version")
7186 if (int(ei_version) != constants.EXPORT_VERSION):
7187 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7188 (ei_version, constants.EXPORT_VERSION),
7189 errors.ECODE_ENVIRON)
7192 def _ReadExportParams(self, einfo):
7193 """Use export parameters as defaults.
7195 In case the opcode doesn't specify (as in override) some instance
7196 parameters, then try to use them from the export information, if
7200 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7202 if self.op.disk_template is None:
7203 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7204 self.op.disk_template = einfo.get(constants.INISECT_INS,
7207 raise errors.OpPrereqError("No disk template specified and the export"
7208 " is missing the disk_template information",
7211 if not self.op.disks:
7212 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7214 # TODO: import the disk iv_name too
7215 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7216 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7217 disks.append({"size": disk_sz})
7218 self.op.disks = disks
7220 raise errors.OpPrereqError("No disk info specified and the export"
7221 " is missing the disk information",
7224 if (not self.op.nics and
7225 einfo.has_option(constants.INISECT_INS, "nic_count")):
7227 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7229 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7230 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7235 if (self.op.hypervisor is None and
7236 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7237 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7238 if einfo.has_section(constants.INISECT_HYP):
7239 # use the export parameters but do not override the ones
7240 # specified by the user
7241 for name, value in einfo.items(constants.INISECT_HYP):
7242 if name not in self.op.hvparams:
7243 self.op.hvparams[name] = value
7245 if einfo.has_section(constants.INISECT_BEP):
7246 # use the parameters, without overriding
7247 for name, value in einfo.items(constants.INISECT_BEP):
7248 if name not in self.op.beparams:
7249 self.op.beparams[name] = value
7251 # try to read the parameters old style, from the main section
7252 for name in constants.BES_PARAMETERS:
7253 if (name not in self.op.beparams and
7254 einfo.has_option(constants.INISECT_INS, name)):
7255 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7257 if einfo.has_section(constants.INISECT_OSP):
7258 # use the parameters, without overriding
7259 for name, value in einfo.items(constants.INISECT_OSP):
7260 if name not in self.op.osparams:
7261 self.op.osparams[name] = value
7263 def _RevertToDefaults(self, cluster):
7264 """Revert the instance parameters to the default values.
7268 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7269 for name in self.op.hvparams.keys():
7270 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7271 del self.op.hvparams[name]
7273 be_defs = cluster.SimpleFillBE({})
7274 for name in self.op.beparams.keys():
7275 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7276 del self.op.beparams[name]
7278 nic_defs = cluster.SimpleFillNIC({})
7279 for nic in self.op.nics:
7280 for name in constants.NICS_PARAMETERS:
7281 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7284 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7285 for name in self.op.osparams.keys():
7286 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7287 del self.op.osparams[name]
7289 def CheckPrereq(self):
7290 """Check prerequisites.
7293 if self.op.mode == constants.INSTANCE_IMPORT:
7294 export_info = self._ReadExportInfo()
7295 self._ReadExportParams(export_info)
7297 _CheckDiskTemplate(self.op.disk_template)
7299 if (not self.cfg.GetVGName() and
7300 self.op.disk_template not in constants.DTS_NOT_LVM):
7301 raise errors.OpPrereqError("Cluster does not support lvm-based"
7302 " instances", errors.ECODE_STATE)
7304 if self.op.hypervisor is None:
7305 self.op.hypervisor = self.cfg.GetHypervisorType()
7307 cluster = self.cfg.GetClusterInfo()
7308 enabled_hvs = cluster.enabled_hypervisors
7309 if self.op.hypervisor not in enabled_hvs:
7310 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7311 " cluster (%s)" % (self.op.hypervisor,
7312 ",".join(enabled_hvs)),
7315 # check hypervisor parameter syntax (locally)
7316 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7317 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7319 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7320 hv_type.CheckParameterSyntax(filled_hvp)
7321 self.hv_full = filled_hvp
7322 # check that we don't specify global parameters on an instance
7323 _CheckGlobalHvParams(self.op.hvparams)
7325 # fill and remember the beparams dict
7326 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7327 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7329 # build os parameters
7330 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7332 # now that hvp/bep are in final format, let's reset to defaults,
7334 if self.op.identify_defaults:
7335 self._RevertToDefaults(cluster)
7339 for idx, nic in enumerate(self.op.nics):
7340 nic_mode_req = nic.get("mode", None)
7341 nic_mode = nic_mode_req
7342 if nic_mode is None:
7343 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7345 # in routed mode, for the first nic, the default ip is 'auto'
7346 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7347 default_ip_mode = constants.VALUE_AUTO
7349 default_ip_mode = constants.VALUE_NONE
7351 # ip validity checks
7352 ip = nic.get("ip", default_ip_mode)
7353 if ip is None or ip.lower() == constants.VALUE_NONE:
7355 elif ip.lower() == constants.VALUE_AUTO:
7356 if not self.op.name_check:
7357 raise errors.OpPrereqError("IP address set to auto but name checks"
7358 " have been skipped",
7360 nic_ip = self.hostname1.ip
7362 if not netutils.IPAddress.IsValid(ip):
7363 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7367 # TODO: check the ip address for uniqueness
7368 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7369 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7372 # MAC address verification
7373 mac = nic.get("mac", constants.VALUE_AUTO)
7374 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7375 mac = utils.NormalizeAndValidateMac(mac)
7378 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7379 except errors.ReservationError:
7380 raise errors.OpPrereqError("MAC address %s already in use"
7381 " in cluster" % mac,
7382 errors.ECODE_NOTUNIQUE)
7384 # bridge verification
7385 bridge = nic.get("bridge", None)
7386 link = nic.get("link", None)
7388 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7389 " at the same time", errors.ECODE_INVAL)
7390 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7391 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7398 nicparams[constants.NIC_MODE] = nic_mode_req
7400 nicparams[constants.NIC_LINK] = link
7402 check_params = cluster.SimpleFillNIC(nicparams)
7403 objects.NIC.CheckParameterSyntax(check_params)
7404 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7406 # disk checks/pre-build
7408 for disk in self.op.disks:
7409 mode = disk.get("mode", constants.DISK_RDWR)
7410 if mode not in constants.DISK_ACCESS_SET:
7411 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7412 mode, errors.ECODE_INVAL)
7413 size = disk.get("size", None)
7415 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7418 except (TypeError, ValueError):
7419 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7421 new_disk = {"size": size, "mode": mode}
7423 new_disk["adopt"] = disk["adopt"]
7424 self.disks.append(new_disk)
7426 if self.op.mode == constants.INSTANCE_IMPORT:
7428 # Check that the new instance doesn't have less disks than the export
7429 instance_disks = len(self.disks)
7430 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7431 if instance_disks < export_disks:
7432 raise errors.OpPrereqError("Not enough disks to import."
7433 " (instance: %d, export: %d)" %
7434 (instance_disks, export_disks),
7438 for idx in range(export_disks):
7439 option = 'disk%d_dump' % idx
7440 if export_info.has_option(constants.INISECT_INS, option):
7441 # FIXME: are the old os-es, disk sizes, etc. useful?
7442 export_name = export_info.get(constants.INISECT_INS, option)
7443 image = utils.PathJoin(self.op.src_path, export_name)
7444 disk_images.append(image)
7446 disk_images.append(False)
7448 self.src_images = disk_images
7450 old_name = export_info.get(constants.INISECT_INS, 'name')
7452 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7453 except (TypeError, ValueError), err:
7454 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7455 " an integer: %s" % str(err),
7457 if self.op.instance_name == old_name:
7458 for idx, nic in enumerate(self.nics):
7459 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7460 nic_mac_ini = 'nic%d_mac' % idx
7461 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7463 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7465 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7466 if self.op.ip_check:
7467 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7468 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7469 (self.check_ip, self.op.instance_name),
7470 errors.ECODE_NOTUNIQUE)
7472 #### mac address generation
7473 # By generating here the mac address both the allocator and the hooks get
7474 # the real final mac address rather than the 'auto' or 'generate' value.
7475 # There is a race condition between the generation and the instance object
7476 # creation, which means that we know the mac is valid now, but we're not
7477 # sure it will be when we actually add the instance. If things go bad
7478 # adding the instance will abort because of a duplicate mac, and the
7479 # creation job will fail.
7480 for nic in self.nics:
7481 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7482 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7486 if self.op.iallocator is not None:
7487 self._RunAllocator()
7489 #### node related checks
7491 # check primary node
7492 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7493 assert self.pnode is not None, \
7494 "Cannot retrieve locked node %s" % self.op.pnode
7496 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7497 pnode.name, errors.ECODE_STATE)
7499 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7500 pnode.name, errors.ECODE_STATE)
7501 if not pnode.vm_capable:
7502 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7503 " '%s'" % pnode.name, errors.ECODE_STATE)
7505 self.secondaries = []
7507 # mirror node verification
7508 if self.op.disk_template in constants.DTS_NET_MIRROR:
7509 if self.op.snode == pnode.name:
7510 raise errors.OpPrereqError("The secondary node cannot be the"
7511 " primary node.", errors.ECODE_INVAL)
7512 _CheckNodeOnline(self, self.op.snode)
7513 _CheckNodeNotDrained(self, self.op.snode)
7514 _CheckNodeVmCapable(self, self.op.snode)
7515 self.secondaries.append(self.op.snode)
7517 nodenames = [pnode.name] + self.secondaries
7519 req_size = _ComputeDiskSize(self.op.disk_template,
7522 # Check lv size requirements, if not adopting
7523 if req_size is not None and not self.adopt_disks:
7524 _CheckNodesFreeDisk(self, nodenames, req_size)
7526 if self.adopt_disks: # instead, we must check the adoption data
7527 all_lvs = set([i["adopt"] for i in self.disks])
7528 if len(all_lvs) != len(self.disks):
7529 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7531 for lv_name in all_lvs:
7533 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7534 except errors.ReservationError:
7535 raise errors.OpPrereqError("LV named %s used by another instance" %
7536 lv_name, errors.ECODE_NOTUNIQUE)
7538 node_lvs = self.rpc.call_lv_list([pnode.name],
7539 self.cfg.GetVGName())[pnode.name]
7540 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7541 node_lvs = node_lvs.payload
7542 delta = all_lvs.difference(node_lvs.keys())
7544 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7545 utils.CommaJoin(delta),
7547 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7549 raise errors.OpPrereqError("Online logical volumes found, cannot"
7550 " adopt: %s" % utils.CommaJoin(online_lvs),
7552 # update the size of disk based on what is found
7553 for dsk in self.disks:
7554 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7556 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7558 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7559 # check OS parameters (remotely)
7560 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7562 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7564 # memory check on primary node
7566 _CheckNodeFreeMemory(self, self.pnode.name,
7567 "creating instance %s" % self.op.instance_name,
7568 self.be_full[constants.BE_MEMORY],
7571 self.dry_run_result = list(nodenames)
7573 def Exec(self, feedback_fn):
7574 """Create and add the instance to the cluster.
7577 instance = self.op.instance_name
7578 pnode_name = self.pnode.name
7580 ht_kind = self.op.hypervisor
7581 if ht_kind in constants.HTS_REQ_PORT:
7582 network_port = self.cfg.AllocatePort()
7586 if constants.ENABLE_FILE_STORAGE:
7587 # this is needed because os.path.join does not accept None arguments
7588 if self.op.file_storage_dir is None:
7589 string_file_storage_dir = ""
7591 string_file_storage_dir = self.op.file_storage_dir
7593 # build the full file storage dir path
7594 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7595 string_file_storage_dir, instance)
7597 file_storage_dir = ""
7599 disks = _GenerateDiskTemplate(self,
7600 self.op.disk_template,
7601 instance, pnode_name,
7605 self.op.file_driver,
7608 iobj = objects.Instance(name=instance, os=self.op.os_type,
7609 primary_node=pnode_name,
7610 nics=self.nics, disks=disks,
7611 disk_template=self.op.disk_template,
7613 network_port=network_port,
7614 beparams=self.op.beparams,
7615 hvparams=self.op.hvparams,
7616 hypervisor=self.op.hypervisor,
7617 osparams=self.op.osparams,
7620 if self.adopt_disks:
7621 # rename LVs to the newly-generated names; we need to construct
7622 # 'fake' LV disks with the old data, plus the new unique_id
7623 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7625 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7626 rename_to.append(t_dsk.logical_id)
7627 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7628 self.cfg.SetDiskID(t_dsk, pnode_name)
7629 result = self.rpc.call_blockdev_rename(pnode_name,
7630 zip(tmp_disks, rename_to))
7631 result.Raise("Failed to rename adoped LVs")
7633 feedback_fn("* creating instance disks...")
7635 _CreateDisks(self, iobj)
7636 except errors.OpExecError:
7637 self.LogWarning("Device creation failed, reverting...")
7639 _RemoveDisks(self, iobj)
7641 self.cfg.ReleaseDRBDMinors(instance)
7644 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7645 feedback_fn("* wiping instance disks...")
7647 _WipeDisks(self, iobj)
7648 except errors.OpExecError:
7649 self.LogWarning("Device wiping failed, reverting...")
7651 _RemoveDisks(self, iobj)
7653 self.cfg.ReleaseDRBDMinors(instance)
7656 feedback_fn("adding instance %s to cluster config" % instance)
7658 self.cfg.AddInstance(iobj, self.proc.GetECId())
7660 # Declare that we don't want to remove the instance lock anymore, as we've
7661 # added the instance to the config
7662 del self.remove_locks[locking.LEVEL_INSTANCE]
7663 # Unlock all the nodes
7664 if self.op.mode == constants.INSTANCE_IMPORT:
7665 nodes_keep = [self.op.src_node]
7666 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7667 if node != self.op.src_node]
7668 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7669 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7671 self.context.glm.release(locking.LEVEL_NODE)
7672 del self.acquired_locks[locking.LEVEL_NODE]
7674 if self.op.wait_for_sync:
7675 disk_abort = not _WaitForSync(self, iobj)
7676 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7677 # make sure the disks are not degraded (still sync-ing is ok)
7679 feedback_fn("* checking mirrors status")
7680 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7685 _RemoveDisks(self, iobj)
7686 self.cfg.RemoveInstance(iobj.name)
7687 # Make sure the instance lock gets removed
7688 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7689 raise errors.OpExecError("There are some degraded disks for"
7692 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7693 if self.op.mode == constants.INSTANCE_CREATE:
7694 if not self.op.no_install:
7695 feedback_fn("* running the instance OS create scripts...")
7696 # FIXME: pass debug option from opcode to backend
7697 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7698 self.op.debug_level)
7699 result.Raise("Could not add os for instance %s"
7700 " on node %s" % (instance, pnode_name))
7702 elif self.op.mode == constants.INSTANCE_IMPORT:
7703 feedback_fn("* running the instance OS import scripts...")
7707 for idx, image in enumerate(self.src_images):
7711 # FIXME: pass debug option from opcode to backend
7712 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7713 constants.IEIO_FILE, (image, ),
7714 constants.IEIO_SCRIPT,
7715 (iobj.disks[idx], idx),
7717 transfers.append(dt)
7720 masterd.instance.TransferInstanceData(self, feedback_fn,
7721 self.op.src_node, pnode_name,
7722 self.pnode.secondary_ip,
7724 if not compat.all(import_result):
7725 self.LogWarning("Some disks for instance %s on node %s were not"
7726 " imported successfully" % (instance, pnode_name))
7728 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7729 feedback_fn("* preparing remote import...")
7730 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7731 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7733 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7734 self.source_x509_ca,
7735 self._cds, timeouts)
7736 if not compat.all(disk_results):
7737 # TODO: Should the instance still be started, even if some disks
7738 # failed to import (valid for local imports, too)?
7739 self.LogWarning("Some disks for instance %s on node %s were not"
7740 " imported successfully" % (instance, pnode_name))
7742 # Run rename script on newly imported instance
7743 assert iobj.name == instance
7744 feedback_fn("Running rename script for %s" % instance)
7745 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7746 self.source_instance_name,
7747 self.op.debug_level)
7749 self.LogWarning("Failed to run rename script for %s on node"
7750 " %s: %s" % (instance, pnode_name, result.fail_msg))
7753 # also checked in the prereq part
7754 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7758 iobj.admin_up = True
7759 self.cfg.Update(iobj, feedback_fn)
7760 logging.info("Starting instance %s on node %s", instance, pnode_name)
7761 feedback_fn("* starting instance...")
7762 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7763 result.Raise("Could not start instance")
7765 return list(iobj.all_nodes)
7768 class LUConnectConsole(NoHooksLU):
7769 """Connect to an instance's console.
7771 This is somewhat special in that it returns the command line that
7772 you need to run on the master node in order to connect to the
7781 def ExpandNames(self):
7782 self._ExpandAndLockInstance()
7784 def CheckPrereq(self):
7785 """Check prerequisites.
7787 This checks that the instance is in the cluster.
7790 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7791 assert self.instance is not None, \
7792 "Cannot retrieve locked instance %s" % self.op.instance_name
7793 _CheckNodeOnline(self, self.instance.primary_node)
7795 def Exec(self, feedback_fn):
7796 """Connect to the console of an instance
7799 instance = self.instance
7800 node = instance.primary_node
7802 node_insts = self.rpc.call_instance_list([node],
7803 [instance.hypervisor])[node]
7804 node_insts.Raise("Can't get node information from %s" % node)
7806 if instance.name not in node_insts.payload:
7807 if instance.admin_up:
7808 state = "ERROR_down"
7810 state = "ADMIN_down"
7811 raise errors.OpExecError("Instance %s is not running (state %s)" %
7812 (instance.name, state))
7814 logging.debug("Connecting to console of %s on %s", instance.name, node)
7816 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7817 cluster = self.cfg.GetClusterInfo()
7818 # beparams and hvparams are passed separately, to avoid editing the
7819 # instance and then saving the defaults in the instance itself.
7820 hvparams = cluster.FillHV(instance)
7821 beparams = cluster.FillBE(instance)
7822 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7825 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7828 class LUReplaceDisks(LogicalUnit):
7829 """Replace the disks of an instance.
7832 HPATH = "mirrors-replace"
7833 HTYPE = constants.HTYPE_INSTANCE
7836 ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7837 ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7838 ("remote_node", None, ht.TMaybeString),
7839 ("iallocator", None, ht.TMaybeString),
7840 ("early_release", False, ht.TBool),
7844 def CheckArguments(self):
7845 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7848 def ExpandNames(self):
7849 self._ExpandAndLockInstance()
7851 if self.op.iallocator is not None:
7852 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7854 elif self.op.remote_node is not None:
7855 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7856 self.op.remote_node = remote_node
7858 # Warning: do not remove the locking of the new secondary here
7859 # unless DRBD8.AddChildren is changed to work in parallel;
7860 # currently it doesn't since parallel invocations of
7861 # FindUnusedMinor will conflict
7862 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7863 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7866 self.needed_locks[locking.LEVEL_NODE] = []
7867 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7869 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7870 self.op.iallocator, self.op.remote_node,
7871 self.op.disks, False, self.op.early_release)
7873 self.tasklets = [self.replacer]
7875 def DeclareLocks(self, level):
7876 # If we're not already locking all nodes in the set we have to declare the
7877 # instance's primary/secondary nodes.
7878 if (level == locking.LEVEL_NODE and
7879 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7880 self._LockInstancesNodes()
7882 def BuildHooksEnv(self):
7885 This runs on the master, the primary and all the secondaries.
7888 instance = self.replacer.instance
7890 "MODE": self.op.mode,
7891 "NEW_SECONDARY": self.op.remote_node,
7892 "OLD_SECONDARY": instance.secondary_nodes[0],
7894 env.update(_BuildInstanceHookEnvByObject(self, instance))
7896 self.cfg.GetMasterNode(),
7897 instance.primary_node,
7899 if self.op.remote_node is not None:
7900 nl.append(self.op.remote_node)
7904 class TLReplaceDisks(Tasklet):
7905 """Replaces disks for an instance.
7907 Note: Locking is not within the scope of this class.
7910 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7911 disks, delay_iallocator, early_release):
7912 """Initializes this class.
7915 Tasklet.__init__(self, lu)
7918 self.instance_name = instance_name
7920 self.iallocator_name = iallocator_name
7921 self.remote_node = remote_node
7923 self.delay_iallocator = delay_iallocator
7924 self.early_release = early_release
7927 self.instance = None
7928 self.new_node = None
7929 self.target_node = None
7930 self.other_node = None
7931 self.remote_node_info = None
7932 self.node_secondary_ip = None
7935 def CheckArguments(mode, remote_node, iallocator):
7936 """Helper function for users of this class.
7939 # check for valid parameter combination
7940 if mode == constants.REPLACE_DISK_CHG:
7941 if remote_node is None and iallocator is None:
7942 raise errors.OpPrereqError("When changing the secondary either an"
7943 " iallocator script must be used or the"
7944 " new node given", errors.ECODE_INVAL)
7946 if remote_node is not None and iallocator is not None:
7947 raise errors.OpPrereqError("Give either the iallocator or the new"
7948 " secondary, not both", errors.ECODE_INVAL)
7950 elif remote_node is not None or iallocator is not None:
7951 # Not replacing the secondary
7952 raise errors.OpPrereqError("The iallocator and new node options can"
7953 " only be used when changing the"
7954 " secondary node", errors.ECODE_INVAL)
7957 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7958 """Compute a new secondary node using an IAllocator.
7961 ial = IAllocator(lu.cfg, lu.rpc,
7962 mode=constants.IALLOCATOR_MODE_RELOC,
7964 relocate_from=relocate_from)
7966 ial.Run(iallocator_name)
7969 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7970 " %s" % (iallocator_name, ial.info),
7973 if len(ial.result) != ial.required_nodes:
7974 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7975 " of nodes (%s), required %s" %
7977 len(ial.result), ial.required_nodes),
7980 remote_node_name = ial.result[0]
7982 lu.LogInfo("Selected new secondary for instance '%s': %s",
7983 instance_name, remote_node_name)
7985 return remote_node_name
7987 def _FindFaultyDisks(self, node_name):
7988 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7991 def CheckPrereq(self):
7992 """Check prerequisites.
7994 This checks that the instance is in the cluster.
7997 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7998 assert instance is not None, \
7999 "Cannot retrieve locked instance %s" % self.instance_name
8001 if instance.disk_template != constants.DT_DRBD8:
8002 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8003 " instances", errors.ECODE_INVAL)
8005 if len(instance.secondary_nodes) != 1:
8006 raise errors.OpPrereqError("The instance has a strange layout,"
8007 " expected one secondary but found %d" %
8008 len(instance.secondary_nodes),
8011 if not self.delay_iallocator:
8012 self._CheckPrereq2()
8014 def _CheckPrereq2(self):
8015 """Check prerequisites, second part.
8017 This function should always be part of CheckPrereq. It was separated and is
8018 now called from Exec because during node evacuation iallocator was only
8019 called with an unmodified cluster model, not taking planned changes into
8023 instance = self.instance
8024 secondary_node = instance.secondary_nodes[0]
8026 if self.iallocator_name is None:
8027 remote_node = self.remote_node
8029 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8030 instance.name, instance.secondary_nodes)
8032 if remote_node is not None:
8033 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8034 assert self.remote_node_info is not None, \
8035 "Cannot retrieve locked node %s" % remote_node
8037 self.remote_node_info = None
8039 if remote_node == self.instance.primary_node:
8040 raise errors.OpPrereqError("The specified node is the primary node of"
8041 " the instance.", errors.ECODE_INVAL)
8043 if remote_node == secondary_node:
8044 raise errors.OpPrereqError("The specified node is already the"
8045 " secondary node of the instance.",
8048 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8049 constants.REPLACE_DISK_CHG):
8050 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8053 if self.mode == constants.REPLACE_DISK_AUTO:
8054 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8055 faulty_secondary = self._FindFaultyDisks(secondary_node)
8057 if faulty_primary and faulty_secondary:
8058 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8059 " one node and can not be repaired"
8060 " automatically" % self.instance_name,
8064 self.disks = faulty_primary
8065 self.target_node = instance.primary_node
8066 self.other_node = secondary_node
8067 check_nodes = [self.target_node, self.other_node]
8068 elif faulty_secondary:
8069 self.disks = faulty_secondary
8070 self.target_node = secondary_node
8071 self.other_node = instance.primary_node
8072 check_nodes = [self.target_node, self.other_node]
8078 # Non-automatic modes
8079 if self.mode == constants.REPLACE_DISK_PRI:
8080 self.target_node = instance.primary_node
8081 self.other_node = secondary_node
8082 check_nodes = [self.target_node, self.other_node]
8084 elif self.mode == constants.REPLACE_DISK_SEC:
8085 self.target_node = secondary_node
8086 self.other_node = instance.primary_node
8087 check_nodes = [self.target_node, self.other_node]
8089 elif self.mode == constants.REPLACE_DISK_CHG:
8090 self.new_node = remote_node
8091 self.other_node = instance.primary_node
8092 self.target_node = secondary_node
8093 check_nodes = [self.new_node, self.other_node]
8095 _CheckNodeNotDrained(self.lu, remote_node)
8096 _CheckNodeVmCapable(self.lu, remote_node)
8098 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8099 assert old_node_info is not None
8100 if old_node_info.offline and not self.early_release:
8101 # doesn't make sense to delay the release
8102 self.early_release = True
8103 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8104 " early-release mode", secondary_node)
8107 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8110 # If not specified all disks should be replaced
8112 self.disks = range(len(self.instance.disks))
8114 for node in check_nodes:
8115 _CheckNodeOnline(self.lu, node)
8117 # Check whether disks are valid
8118 for disk_idx in self.disks:
8119 instance.FindDisk(disk_idx)
8121 # Get secondary node IP addresses
8124 for node_name in [self.target_node, self.other_node, self.new_node]:
8125 if node_name is not None:
8126 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8128 self.node_secondary_ip = node_2nd_ip
8130 def Exec(self, feedback_fn):
8131 """Execute disk replacement.
8133 This dispatches the disk replacement to the appropriate handler.
8136 if self.delay_iallocator:
8137 self._CheckPrereq2()
8140 feedback_fn("No disks need replacement")
8143 feedback_fn("Replacing disk(s) %s for %s" %
8144 (utils.CommaJoin(self.disks), self.instance.name))
8146 activate_disks = (not self.instance.admin_up)
8148 # Activate the instance disks if we're replacing them on a down instance
8150 _StartInstanceDisks(self.lu, self.instance, True)
8153 # Should we replace the secondary node?
8154 if self.new_node is not None:
8155 fn = self._ExecDrbd8Secondary
8157 fn = self._ExecDrbd8DiskOnly
8159 return fn(feedback_fn)
8162 # Deactivate the instance disks if we're replacing them on a
8165 _SafeShutdownInstanceDisks(self.lu, self.instance)
8167 def _CheckVolumeGroup(self, nodes):
8168 self.lu.LogInfo("Checking volume groups")
8170 vgname = self.cfg.GetVGName()
8172 # Make sure volume group exists on all involved nodes
8173 results = self.rpc.call_vg_list(nodes)
8175 raise errors.OpExecError("Can't list volume groups on the nodes")
8179 res.Raise("Error checking node %s" % node)
8180 if vgname not in res.payload:
8181 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8184 def _CheckDisksExistence(self, nodes):
8185 # Check disk existence
8186 for idx, dev in enumerate(self.instance.disks):
8187 if idx not in self.disks:
8191 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8192 self.cfg.SetDiskID(dev, node)
8194 result = self.rpc.call_blockdev_find(node, dev)
8196 msg = result.fail_msg
8197 if msg or not result.payload:
8199 msg = "disk not found"
8200 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8203 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8204 for idx, dev in enumerate(self.instance.disks):
8205 if idx not in self.disks:
8208 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8211 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8213 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8214 " replace disks for instance %s" %
8215 (node_name, self.instance.name))
8217 def _CreateNewStorage(self, node_name):
8218 vgname = self.cfg.GetVGName()
8221 for idx, dev in enumerate(self.instance.disks):
8222 if idx not in self.disks:
8225 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8227 self.cfg.SetDiskID(dev, node_name)
8229 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8230 names = _GenerateUniqueNames(self.lu, lv_names)
8232 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8233 logical_id=(vgname, names[0]))
8234 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8235 logical_id=(vgname, names[1]))
8237 new_lvs = [lv_data, lv_meta]
8238 old_lvs = dev.children
8239 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8241 # we pass force_create=True to force the LVM creation
8242 for new_lv in new_lvs:
8243 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8244 _GetInstanceInfoText(self.instance), False)
8248 def _CheckDevices(self, node_name, iv_names):
8249 for name, (dev, _, _) in iv_names.iteritems():
8250 self.cfg.SetDiskID(dev, node_name)
8252 result = self.rpc.call_blockdev_find(node_name, dev)
8254 msg = result.fail_msg
8255 if msg or not result.payload:
8257 msg = "disk not found"
8258 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8261 if result.payload.is_degraded:
8262 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8264 def _RemoveOldStorage(self, node_name, iv_names):
8265 for name, (_, old_lvs, _) in iv_names.iteritems():
8266 self.lu.LogInfo("Remove logical volumes for %s" % name)
8269 self.cfg.SetDiskID(lv, node_name)
8271 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8273 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8274 hint="remove unused LVs manually")
8276 def _ReleaseNodeLock(self, node_name):
8277 """Releases the lock for a given node."""
8278 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8280 def _ExecDrbd8DiskOnly(self, feedback_fn):
8281 """Replace a disk on the primary or secondary for DRBD 8.
8283 The algorithm for replace is quite complicated:
8285 1. for each disk to be replaced:
8287 1. create new LVs on the target node with unique names
8288 1. detach old LVs from the drbd device
8289 1. rename old LVs to name_replaced.<time_t>
8290 1. rename new LVs to old LVs
8291 1. attach the new LVs (with the old names now) to the drbd device
8293 1. wait for sync across all devices
8295 1. for each modified disk:
8297 1. remove old LVs (which have the name name_replaces.<time_t>)
8299 Failures are not very well handled.
8304 # Step: check device activation
8305 self.lu.LogStep(1, steps_total, "Check device existence")
8306 self._CheckDisksExistence([self.other_node, self.target_node])
8307 self._CheckVolumeGroup([self.target_node, self.other_node])
8309 # Step: check other node consistency
8310 self.lu.LogStep(2, steps_total, "Check peer consistency")
8311 self._CheckDisksConsistency(self.other_node,
8312 self.other_node == self.instance.primary_node,
8315 # Step: create new storage
8316 self.lu.LogStep(3, steps_total, "Allocate new storage")
8317 iv_names = self._CreateNewStorage(self.target_node)
8319 # Step: for each lv, detach+rename*2+attach
8320 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8321 for dev, old_lvs, new_lvs in iv_names.itervalues():
8322 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8324 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8326 result.Raise("Can't detach drbd from local storage on node"
8327 " %s for device %s" % (self.target_node, dev.iv_name))
8329 #cfg.Update(instance)
8331 # ok, we created the new LVs, so now we know we have the needed
8332 # storage; as such, we proceed on the target node to rename
8333 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8334 # using the assumption that logical_id == physical_id (which in
8335 # turn is the unique_id on that node)
8337 # FIXME(iustin): use a better name for the replaced LVs
8338 temp_suffix = int(time.time())
8339 ren_fn = lambda d, suff: (d.physical_id[0],
8340 d.physical_id[1] + "_replaced-%s" % suff)
8342 # Build the rename list based on what LVs exist on the node
8343 rename_old_to_new = []
8344 for to_ren in old_lvs:
8345 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8346 if not result.fail_msg and result.payload:
8348 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8350 self.lu.LogInfo("Renaming the old LVs on the target node")
8351 result = self.rpc.call_blockdev_rename(self.target_node,
8353 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8355 # Now we rename the new LVs to the old LVs
8356 self.lu.LogInfo("Renaming the new LVs on the target node")
8357 rename_new_to_old = [(new, old.physical_id)
8358 for old, new in zip(old_lvs, new_lvs)]
8359 result = self.rpc.call_blockdev_rename(self.target_node,
8361 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8363 for old, new in zip(old_lvs, new_lvs):
8364 new.logical_id = old.logical_id
8365 self.cfg.SetDiskID(new, self.target_node)
8367 for disk in old_lvs:
8368 disk.logical_id = ren_fn(disk, temp_suffix)
8369 self.cfg.SetDiskID(disk, self.target_node)
8371 # Now that the new lvs have the old name, we can add them to the device
8372 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8373 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8375 msg = result.fail_msg
8377 for new_lv in new_lvs:
8378 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8381 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8382 hint=("cleanup manually the unused logical"
8384 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8386 dev.children = new_lvs
8388 self.cfg.Update(self.instance, feedback_fn)
8391 if self.early_release:
8392 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8394 self._RemoveOldStorage(self.target_node, iv_names)
8395 # WARNING: we release both node locks here, do not do other RPCs
8396 # than WaitForSync to the primary node
8397 self._ReleaseNodeLock([self.target_node, self.other_node])
8400 # This can fail as the old devices are degraded and _WaitForSync
8401 # does a combined result over all disks, so we don't check its return value
8402 self.lu.LogStep(cstep, steps_total, "Sync devices")
8404 _WaitForSync(self.lu, self.instance)
8406 # Check all devices manually
8407 self._CheckDevices(self.instance.primary_node, iv_names)
8409 # Step: remove old storage
8410 if not self.early_release:
8411 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8413 self._RemoveOldStorage(self.target_node, iv_names)
8415 def _ExecDrbd8Secondary(self, feedback_fn):
8416 """Replace the secondary node for DRBD 8.
8418 The algorithm for replace is quite complicated:
8419 - for all disks of the instance:
8420 - create new LVs on the new node with same names
8421 - shutdown the drbd device on the old secondary
8422 - disconnect the drbd network on the primary
8423 - create the drbd device on the new secondary
8424 - network attach the drbd on the primary, using an artifice:
8425 the drbd code for Attach() will connect to the network if it
8426 finds a device which is connected to the good local disks but
8428 - wait for sync across all devices
8429 - remove all disks from the old secondary
8431 Failures are not very well handled.
8436 # Step: check device activation
8437 self.lu.LogStep(1, steps_total, "Check device existence")
8438 self._CheckDisksExistence([self.instance.primary_node])
8439 self._CheckVolumeGroup([self.instance.primary_node])
8441 # Step: check other node consistency
8442 self.lu.LogStep(2, steps_total, "Check peer consistency")
8443 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8445 # Step: create new storage
8446 self.lu.LogStep(3, steps_total, "Allocate new storage")
8447 for idx, dev in enumerate(self.instance.disks):
8448 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8449 (self.new_node, idx))
8450 # we pass force_create=True to force LVM creation
8451 for new_lv in dev.children:
8452 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8453 _GetInstanceInfoText(self.instance), False)
8455 # Step 4: dbrd minors and drbd setups changes
8456 # after this, we must manually remove the drbd minors on both the
8457 # error and the success paths
8458 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8459 minors = self.cfg.AllocateDRBDMinor([self.new_node
8460 for dev in self.instance.disks],
8462 logging.debug("Allocated minors %r", minors)
8465 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8466 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8467 (self.new_node, idx))
8468 # create new devices on new_node; note that we create two IDs:
8469 # one without port, so the drbd will be activated without
8470 # networking information on the new node at this stage, and one
8471 # with network, for the latter activation in step 4
8472 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8473 if self.instance.primary_node == o_node1:
8476 assert self.instance.primary_node == o_node2, "Three-node instance?"
8479 new_alone_id = (self.instance.primary_node, self.new_node, None,
8480 p_minor, new_minor, o_secret)
8481 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8482 p_minor, new_minor, o_secret)
8484 iv_names[idx] = (dev, dev.children, new_net_id)
8485 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8487 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8488 logical_id=new_alone_id,
8489 children=dev.children,
8492 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8493 _GetInstanceInfoText(self.instance), False)
8494 except errors.GenericError:
8495 self.cfg.ReleaseDRBDMinors(self.instance.name)
8498 # We have new devices, shutdown the drbd on the old secondary
8499 for idx, dev in enumerate(self.instance.disks):
8500 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8501 self.cfg.SetDiskID(dev, self.target_node)
8502 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8504 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8505 "node: %s" % (idx, msg),
8506 hint=("Please cleanup this device manually as"
8507 " soon as possible"))
8509 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8510 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8511 self.node_secondary_ip,
8512 self.instance.disks)\
8513 [self.instance.primary_node]
8515 msg = result.fail_msg
8517 # detaches didn't succeed (unlikely)
8518 self.cfg.ReleaseDRBDMinors(self.instance.name)
8519 raise errors.OpExecError("Can't detach the disks from the network on"
8520 " old node: %s" % (msg,))
8522 # if we managed to detach at least one, we update all the disks of
8523 # the instance to point to the new secondary
8524 self.lu.LogInfo("Updating instance configuration")
8525 for dev, _, new_logical_id in iv_names.itervalues():
8526 dev.logical_id = new_logical_id
8527 self.cfg.SetDiskID(dev, self.instance.primary_node)
8529 self.cfg.Update(self.instance, feedback_fn)
8531 # and now perform the drbd attach
8532 self.lu.LogInfo("Attaching primary drbds to new secondary"
8533 " (standalone => connected)")
8534 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8536 self.node_secondary_ip,
8537 self.instance.disks,
8540 for to_node, to_result in result.items():
8541 msg = to_result.fail_msg
8543 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8545 hint=("please do a gnt-instance info to see the"
8546 " status of disks"))
8548 if self.early_release:
8549 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8551 self._RemoveOldStorage(self.target_node, iv_names)
8552 # WARNING: we release all node locks here, do not do other RPCs
8553 # than WaitForSync to the primary node
8554 self._ReleaseNodeLock([self.instance.primary_node,
8559 # This can fail as the old devices are degraded and _WaitForSync
8560 # does a combined result over all disks, so we don't check its return value
8561 self.lu.LogStep(cstep, steps_total, "Sync devices")
8563 _WaitForSync(self.lu, self.instance)
8565 # Check all devices manually
8566 self._CheckDevices(self.instance.primary_node, iv_names)
8568 # Step: remove old storage
8569 if not self.early_release:
8570 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8571 self._RemoveOldStorage(self.target_node, iv_names)
8574 class LURepairNodeStorage(NoHooksLU):
8575 """Repairs the volume group on a node.
8580 ("storage_type", ht.NoDefault, _CheckStorageType),
8581 ("name", ht.NoDefault, ht.TNonEmptyString),
8582 ("ignore_consistency", False, ht.TBool),
8586 def CheckArguments(self):
8587 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8589 storage_type = self.op.storage_type
8591 if (constants.SO_FIX_CONSISTENCY not in
8592 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8593 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8594 " repaired" % storage_type,
8597 def ExpandNames(self):
8598 self.needed_locks = {
8599 locking.LEVEL_NODE: [self.op.node_name],
8602 def _CheckFaultyDisks(self, instance, node_name):
8603 """Ensure faulty disks abort the opcode or at least warn."""
8605 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8607 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8608 " node '%s'" % (instance.name, node_name),
8610 except errors.OpPrereqError, err:
8611 if self.op.ignore_consistency:
8612 self.proc.LogWarning(str(err.args[0]))
8616 def CheckPrereq(self):
8617 """Check prerequisites.
8620 # Check whether any instance on this node has faulty disks
8621 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8622 if not inst.admin_up:
8624 check_nodes = set(inst.all_nodes)
8625 check_nodes.discard(self.op.node_name)
8626 for inst_node_name in check_nodes:
8627 self._CheckFaultyDisks(inst, inst_node_name)
8629 def Exec(self, feedback_fn):
8630 feedback_fn("Repairing storage unit '%s' on %s ..." %
8631 (self.op.name, self.op.node_name))
8633 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8634 result = self.rpc.call_storage_execute(self.op.node_name,
8635 self.op.storage_type, st_args,
8637 constants.SO_FIX_CONSISTENCY)
8638 result.Raise("Failed to repair storage unit '%s' on %s" %
8639 (self.op.name, self.op.node_name))
8642 class LUNodeEvacuationStrategy(NoHooksLU):
8643 """Computes the node evacuation strategy.
8647 ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8648 ("remote_node", None, ht.TMaybeString),
8649 ("iallocator", None, ht.TMaybeString),
8653 def CheckArguments(self):
8654 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8656 def ExpandNames(self):
8657 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8658 self.needed_locks = locks = {}
8659 if self.op.remote_node is None:
8660 locks[locking.LEVEL_NODE] = locking.ALL_SET
8662 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8663 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8665 def Exec(self, feedback_fn):
8666 if self.op.remote_node is not None:
8668 for node in self.op.nodes:
8669 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8672 if i.primary_node == self.op.remote_node:
8673 raise errors.OpPrereqError("Node %s is the primary node of"
8674 " instance %s, cannot use it as"
8676 (self.op.remote_node, i.name),
8678 result.append([i.name, self.op.remote_node])
8680 ial = IAllocator(self.cfg, self.rpc,
8681 mode=constants.IALLOCATOR_MODE_MEVAC,
8682 evac_nodes=self.op.nodes)
8683 ial.Run(self.op.iallocator, validate=True)
8685 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8691 class LUGrowDisk(LogicalUnit):
8692 """Grow a disk of an instance.
8696 HTYPE = constants.HTYPE_INSTANCE
8699 ("disk", ht.NoDefault, ht.TInt),
8700 ("amount", ht.NoDefault, ht.TInt),
8701 ("wait_for_sync", True, ht.TBool),
8705 def ExpandNames(self):
8706 self._ExpandAndLockInstance()
8707 self.needed_locks[locking.LEVEL_NODE] = []
8708 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8710 def DeclareLocks(self, level):
8711 if level == locking.LEVEL_NODE:
8712 self._LockInstancesNodes()
8714 def BuildHooksEnv(self):
8717 This runs on the master, the primary and all the secondaries.
8721 "DISK": self.op.disk,
8722 "AMOUNT": self.op.amount,
8724 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8725 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8728 def CheckPrereq(self):
8729 """Check prerequisites.
8731 This checks that the instance is in the cluster.
8734 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8735 assert instance is not None, \
8736 "Cannot retrieve locked instance %s" % self.op.instance_name
8737 nodenames = list(instance.all_nodes)
8738 for node in nodenames:
8739 _CheckNodeOnline(self, node)
8741 self.instance = instance
8743 if instance.disk_template not in constants.DTS_GROWABLE:
8744 raise errors.OpPrereqError("Instance's disk layout does not support"
8745 " growing.", errors.ECODE_INVAL)
8747 self.disk = instance.FindDisk(self.op.disk)
8749 if instance.disk_template != constants.DT_FILE:
8750 # TODO: check the free disk space for file, when that feature will be
8752 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8754 def Exec(self, feedback_fn):
8755 """Execute disk grow.
8758 instance = self.instance
8761 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8763 raise errors.OpExecError("Cannot activate block device to grow")
8765 for node in instance.all_nodes:
8766 self.cfg.SetDiskID(disk, node)
8767 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8768 result.Raise("Grow request failed to node %s" % node)
8770 # TODO: Rewrite code to work properly
8771 # DRBD goes into sync mode for a short amount of time after executing the
8772 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8773 # calling "resize" in sync mode fails. Sleeping for a short amount of
8774 # time is a work-around.
8777 disk.RecordGrow(self.op.amount)
8778 self.cfg.Update(instance, feedback_fn)
8779 if self.op.wait_for_sync:
8780 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8782 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8783 " status.\nPlease check the instance.")
8784 if not instance.admin_up:
8785 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8786 elif not instance.admin_up:
8787 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8788 " not supposed to be running because no wait for"
8789 " sync mode was requested.")
8792 class LUQueryInstanceData(NoHooksLU):
8793 """Query runtime instance data.
8797 ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8798 ("static", False, ht.TBool),
8802 def ExpandNames(self):
8803 self.needed_locks = {}
8804 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8806 if self.op.instances:
8807 self.wanted_names = []
8808 for name in self.op.instances:
8809 full_name = _ExpandInstanceName(self.cfg, name)
8810 self.wanted_names.append(full_name)
8811 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8813 self.wanted_names = None
8814 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8816 self.needed_locks[locking.LEVEL_NODE] = []
8817 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8819 def DeclareLocks(self, level):
8820 if level == locking.LEVEL_NODE:
8821 self._LockInstancesNodes()
8823 def CheckPrereq(self):
8824 """Check prerequisites.
8826 This only checks the optional instance list against the existing names.
8829 if self.wanted_names is None:
8830 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8832 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8833 in self.wanted_names]
8835 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8836 """Returns the status of a block device
8839 if self.op.static or not node:
8842 self.cfg.SetDiskID(dev, node)
8844 result = self.rpc.call_blockdev_find(node, dev)
8848 result.Raise("Can't compute disk status for %s" % instance_name)
8850 status = result.payload
8854 return (status.dev_path, status.major, status.minor,
8855 status.sync_percent, status.estimated_time,
8856 status.is_degraded, status.ldisk_status)
8858 def _ComputeDiskStatus(self, instance, snode, dev):
8859 """Compute block device status.
8862 if dev.dev_type in constants.LDS_DRBD:
8863 # we change the snode then (otherwise we use the one passed in)
8864 if dev.logical_id[0] == instance.primary_node:
8865 snode = dev.logical_id[1]
8867 snode = dev.logical_id[0]
8869 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8871 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8874 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8875 for child in dev.children]
8880 "iv_name": dev.iv_name,
8881 "dev_type": dev.dev_type,
8882 "logical_id": dev.logical_id,
8883 "physical_id": dev.physical_id,
8884 "pstatus": dev_pstatus,
8885 "sstatus": dev_sstatus,
8886 "children": dev_children,
8893 def Exec(self, feedback_fn):
8894 """Gather and return data"""
8897 cluster = self.cfg.GetClusterInfo()
8899 for instance in self.wanted_instances:
8900 if not self.op.static:
8901 remote_info = self.rpc.call_instance_info(instance.primary_node,
8903 instance.hypervisor)
8904 remote_info.Raise("Error checking node %s" % instance.primary_node)
8905 remote_info = remote_info.payload
8906 if remote_info and "state" in remote_info:
8909 remote_state = "down"
8912 if instance.admin_up:
8915 config_state = "down"
8917 disks = [self._ComputeDiskStatus(instance, None, device)
8918 for device in instance.disks]
8921 "name": instance.name,
8922 "config_state": config_state,
8923 "run_state": remote_state,
8924 "pnode": instance.primary_node,
8925 "snodes": instance.secondary_nodes,
8927 # this happens to be the same format used for hooks
8928 "nics": _NICListToTuple(self, instance.nics),
8929 "disk_template": instance.disk_template,
8931 "hypervisor": instance.hypervisor,
8932 "network_port": instance.network_port,
8933 "hv_instance": instance.hvparams,
8934 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8935 "be_instance": instance.beparams,
8936 "be_actual": cluster.FillBE(instance),
8937 "os_instance": instance.osparams,
8938 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8939 "serial_no": instance.serial_no,
8940 "mtime": instance.mtime,
8941 "ctime": instance.ctime,
8942 "uuid": instance.uuid,
8945 result[instance.name] = idict
8950 class LUSetInstanceParams(LogicalUnit):
8951 """Modifies an instances's parameters.
8954 HPATH = "instance-modify"
8955 HTYPE = constants.HTYPE_INSTANCE
8958 ("nics", ht.EmptyList, ht.TList),
8959 ("disks", ht.EmptyList, ht.TList),
8960 ("beparams", ht.EmptyDict, ht.TDict),
8961 ("hvparams", ht.EmptyDict, ht.TDict),
8962 ("disk_template", None, ht.TMaybeString),
8963 ("remote_node", None, ht.TMaybeString),
8964 ("os_name", None, ht.TMaybeString),
8965 ("force_variant", False, ht.TBool),
8966 ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
8971 def CheckArguments(self):
8972 if not (self.op.nics or self.op.disks or self.op.disk_template or
8973 self.op.hvparams or self.op.beparams or self.op.os_name):
8974 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8976 if self.op.hvparams:
8977 _CheckGlobalHvParams(self.op.hvparams)
8981 for disk_op, disk_dict in self.op.disks:
8982 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8983 if disk_op == constants.DDM_REMOVE:
8986 elif disk_op == constants.DDM_ADD:
8989 if not isinstance(disk_op, int):
8990 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8991 if not isinstance(disk_dict, dict):
8992 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8993 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8995 if disk_op == constants.DDM_ADD:
8996 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8997 if mode not in constants.DISK_ACCESS_SET:
8998 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9000 size = disk_dict.get('size', None)
9002 raise errors.OpPrereqError("Required disk parameter size missing",
9006 except (TypeError, ValueError), err:
9007 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9008 str(err), errors.ECODE_INVAL)
9009 disk_dict['size'] = size
9011 # modification of disk
9012 if 'size' in disk_dict:
9013 raise errors.OpPrereqError("Disk size change not possible, use"
9014 " grow-disk", errors.ECODE_INVAL)
9016 if disk_addremove > 1:
9017 raise errors.OpPrereqError("Only one disk add or remove operation"
9018 " supported at a time", errors.ECODE_INVAL)
9020 if self.op.disks and self.op.disk_template is not None:
9021 raise errors.OpPrereqError("Disk template conversion and other disk"
9022 " changes not supported at the same time",
9025 if self.op.disk_template:
9026 _CheckDiskTemplate(self.op.disk_template)
9027 if (self.op.disk_template in constants.DTS_NET_MIRROR and
9028 self.op.remote_node is None):
9029 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9030 " one requires specifying a secondary node",
9035 for nic_op, nic_dict in self.op.nics:
9036 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9037 if nic_op == constants.DDM_REMOVE:
9040 elif nic_op == constants.DDM_ADD:
9043 if not isinstance(nic_op, int):
9044 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9045 if not isinstance(nic_dict, dict):
9046 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9047 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9049 # nic_dict should be a dict
9050 nic_ip = nic_dict.get('ip', None)
9051 if nic_ip is not None:
9052 if nic_ip.lower() == constants.VALUE_NONE:
9053 nic_dict['ip'] = None
9055 if not netutils.IPAddress.IsValid(nic_ip):
9056 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9059 nic_bridge = nic_dict.get('bridge', None)
9060 nic_link = nic_dict.get('link', None)
9061 if nic_bridge and nic_link:
9062 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9063 " at the same time", errors.ECODE_INVAL)
9064 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9065 nic_dict['bridge'] = None
9066 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9067 nic_dict['link'] = None
9069 if nic_op == constants.DDM_ADD:
9070 nic_mac = nic_dict.get('mac', None)
9072 nic_dict['mac'] = constants.VALUE_AUTO
9074 if 'mac' in nic_dict:
9075 nic_mac = nic_dict['mac']
9076 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9077 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9079 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9080 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9081 " modifying an existing nic",
9084 if nic_addremove > 1:
9085 raise errors.OpPrereqError("Only one NIC add or remove operation"
9086 " supported at a time", errors.ECODE_INVAL)
9088 def ExpandNames(self):
9089 self._ExpandAndLockInstance()
9090 self.needed_locks[locking.LEVEL_NODE] = []
9091 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9093 def DeclareLocks(self, level):
9094 if level == locking.LEVEL_NODE:
9095 self._LockInstancesNodes()
9096 if self.op.disk_template and self.op.remote_node:
9097 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9098 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9100 def BuildHooksEnv(self):
9103 This runs on the master, primary and secondaries.
9107 if constants.BE_MEMORY in self.be_new:
9108 args['memory'] = self.be_new[constants.BE_MEMORY]
9109 if constants.BE_VCPUS in self.be_new:
9110 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9111 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9112 # information at all.
9115 nic_override = dict(self.op.nics)
9116 for idx, nic in enumerate(self.instance.nics):
9117 if idx in nic_override:
9118 this_nic_override = nic_override[idx]
9120 this_nic_override = {}
9121 if 'ip' in this_nic_override:
9122 ip = this_nic_override['ip']
9125 if 'mac' in this_nic_override:
9126 mac = this_nic_override['mac']
9129 if idx in self.nic_pnew:
9130 nicparams = self.nic_pnew[idx]
9132 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9133 mode = nicparams[constants.NIC_MODE]
9134 link = nicparams[constants.NIC_LINK]
9135 args['nics'].append((ip, mac, mode, link))
9136 if constants.DDM_ADD in nic_override:
9137 ip = nic_override[constants.DDM_ADD].get('ip', None)
9138 mac = nic_override[constants.DDM_ADD]['mac']
9139 nicparams = self.nic_pnew[constants.DDM_ADD]
9140 mode = nicparams[constants.NIC_MODE]
9141 link = nicparams[constants.NIC_LINK]
9142 args['nics'].append((ip, mac, mode, link))
9143 elif constants.DDM_REMOVE in nic_override:
9144 del args['nics'][-1]
9146 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9147 if self.op.disk_template:
9148 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9149 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9152 def CheckPrereq(self):
9153 """Check prerequisites.
9155 This only checks the instance list against the existing names.
9158 # checking the new params on the primary/secondary nodes
9160 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9161 cluster = self.cluster = self.cfg.GetClusterInfo()
9162 assert self.instance is not None, \
9163 "Cannot retrieve locked instance %s" % self.op.instance_name
9164 pnode = instance.primary_node
9165 nodelist = list(instance.all_nodes)
9168 if self.op.os_name and not self.op.force:
9169 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9170 self.op.force_variant)
9171 instance_os = self.op.os_name
9173 instance_os = instance.os
9175 if self.op.disk_template:
9176 if instance.disk_template == self.op.disk_template:
9177 raise errors.OpPrereqError("Instance already has disk template %s" %
9178 instance.disk_template, errors.ECODE_INVAL)
9180 if (instance.disk_template,
9181 self.op.disk_template) not in self._DISK_CONVERSIONS:
9182 raise errors.OpPrereqError("Unsupported disk template conversion from"
9183 " %s to %s" % (instance.disk_template,
9184 self.op.disk_template),
9186 _CheckInstanceDown(self, instance, "cannot change disk template")
9187 if self.op.disk_template in constants.DTS_NET_MIRROR:
9188 if self.op.remote_node == pnode:
9189 raise errors.OpPrereqError("Given new secondary node %s is the same"
9190 " as the primary node of the instance" %
9191 self.op.remote_node, errors.ECODE_STATE)
9192 _CheckNodeOnline(self, self.op.remote_node)
9193 _CheckNodeNotDrained(self, self.op.remote_node)
9194 disks = [{"size": d.size} for d in instance.disks]
9195 required = _ComputeDiskSize(self.op.disk_template, disks)
9196 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
9198 # hvparams processing
9199 if self.op.hvparams:
9200 hv_type = instance.hypervisor
9201 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9202 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9203 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9206 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9207 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9208 self.hv_new = hv_new # the new actual values
9209 self.hv_inst = i_hvdict # the new dict (without defaults)
9211 self.hv_new = self.hv_inst = {}
9213 # beparams processing
9214 if self.op.beparams:
9215 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9217 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9218 be_new = cluster.SimpleFillBE(i_bedict)
9219 self.be_new = be_new # the new actual values
9220 self.be_inst = i_bedict # the new dict (without defaults)
9222 self.be_new = self.be_inst = {}
9224 # osparams processing
9225 if self.op.osparams:
9226 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9227 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9228 self.os_inst = i_osdict # the new dict (without defaults)
9234 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9235 mem_check_list = [pnode]
9236 if be_new[constants.BE_AUTO_BALANCE]:
9237 # either we changed auto_balance to yes or it was from before
9238 mem_check_list.extend(instance.secondary_nodes)
9239 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9240 instance.hypervisor)
9241 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
9242 instance.hypervisor)
9243 pninfo = nodeinfo[pnode]
9244 msg = pninfo.fail_msg
9246 # Assume the primary node is unreachable and go ahead
9247 self.warn.append("Can't get info from primary node %s: %s" %
9249 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9250 self.warn.append("Node data from primary node %s doesn't contain"
9251 " free memory information" % pnode)
9252 elif instance_info.fail_msg:
9253 self.warn.append("Can't get instance runtime information: %s" %
9254 instance_info.fail_msg)
9256 if instance_info.payload:
9257 current_mem = int(instance_info.payload['memory'])
9259 # Assume instance not running
9260 # (there is a slight race condition here, but it's not very probable,
9261 # and we have no other way to check)
9263 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9264 pninfo.payload['memory_free'])
9266 raise errors.OpPrereqError("This change will prevent the instance"
9267 " from starting, due to %d MB of memory"
9268 " missing on its primary node" % miss_mem,
9271 if be_new[constants.BE_AUTO_BALANCE]:
9272 for node, nres in nodeinfo.items():
9273 if node not in instance.secondary_nodes:
9277 self.warn.append("Can't get info from secondary node %s: %s" %
9279 elif not isinstance(nres.payload.get('memory_free', None), int):
9280 self.warn.append("Secondary node %s didn't return free"
9281 " memory information" % node)
9282 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9283 self.warn.append("Not enough memory to failover instance to"
9284 " secondary node %s" % node)
9289 for nic_op, nic_dict in self.op.nics:
9290 if nic_op == constants.DDM_REMOVE:
9291 if not instance.nics:
9292 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9295 if nic_op != constants.DDM_ADD:
9297 if not instance.nics:
9298 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9299 " no NICs" % nic_op,
9301 if nic_op < 0 or nic_op >= len(instance.nics):
9302 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9304 (nic_op, len(instance.nics) - 1),
9306 old_nic_params = instance.nics[nic_op].nicparams
9307 old_nic_ip = instance.nics[nic_op].ip
9312 update_params_dict = dict([(key, nic_dict[key])
9313 for key in constants.NICS_PARAMETERS
9314 if key in nic_dict])
9316 if 'bridge' in nic_dict:
9317 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9319 new_nic_params = _GetUpdatedParams(old_nic_params,
9321 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9322 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9323 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9324 self.nic_pinst[nic_op] = new_nic_params
9325 self.nic_pnew[nic_op] = new_filled_nic_params
9326 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9328 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9329 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9330 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9332 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9334 self.warn.append(msg)
9336 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9337 if new_nic_mode == constants.NIC_MODE_ROUTED:
9338 if 'ip' in nic_dict:
9339 nic_ip = nic_dict['ip']
9343 raise errors.OpPrereqError('Cannot set the nic ip to None'
9344 ' on a routed nic', errors.ECODE_INVAL)
9345 if 'mac' in nic_dict:
9346 nic_mac = nic_dict['mac']
9348 raise errors.OpPrereqError('Cannot set the nic mac to None',
9350 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9351 # otherwise generate the mac
9352 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9354 # or validate/reserve the current one
9356 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9357 except errors.ReservationError:
9358 raise errors.OpPrereqError("MAC address %s already in use"
9359 " in cluster" % nic_mac,
9360 errors.ECODE_NOTUNIQUE)
9363 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9364 raise errors.OpPrereqError("Disk operations not supported for"
9365 " diskless instances",
9367 for disk_op, _ in self.op.disks:
9368 if disk_op == constants.DDM_REMOVE:
9369 if len(instance.disks) == 1:
9370 raise errors.OpPrereqError("Cannot remove the last disk of"
9371 " an instance", errors.ECODE_INVAL)
9372 _CheckInstanceDown(self, instance, "cannot remove disks")
9374 if (disk_op == constants.DDM_ADD and
9375 len(instance.nics) >= constants.MAX_DISKS):
9376 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9377 " add more" % constants.MAX_DISKS,
9379 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9381 if disk_op < 0 or disk_op >= len(instance.disks):
9382 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9384 (disk_op, len(instance.disks)),
9389 def _ConvertPlainToDrbd(self, feedback_fn):
9390 """Converts an instance from plain to drbd.
9393 feedback_fn("Converting template to drbd")
9394 instance = self.instance
9395 pnode = instance.primary_node
9396 snode = self.op.remote_node
9398 # create a fake disk info for _GenerateDiskTemplate
9399 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9400 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9401 instance.name, pnode, [snode],
9402 disk_info, None, None, 0)
9403 info = _GetInstanceInfoText(instance)
9404 feedback_fn("Creating aditional volumes...")
9405 # first, create the missing data and meta devices
9406 for disk in new_disks:
9407 # unfortunately this is... not too nice
9408 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9410 for child in disk.children:
9411 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9412 # at this stage, all new LVs have been created, we can rename the
9414 feedback_fn("Renaming original volumes...")
9415 rename_list = [(o, n.children[0].logical_id)
9416 for (o, n) in zip(instance.disks, new_disks)]
9417 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9418 result.Raise("Failed to rename original LVs")
9420 feedback_fn("Initializing DRBD devices...")
9421 # all child devices are in place, we can now create the DRBD devices
9422 for disk in new_disks:
9423 for node in [pnode, snode]:
9424 f_create = node == pnode
9425 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9427 # at this point, the instance has been modified
9428 instance.disk_template = constants.DT_DRBD8
9429 instance.disks = new_disks
9430 self.cfg.Update(instance, feedback_fn)
9432 # disks are created, waiting for sync
9433 disk_abort = not _WaitForSync(self, instance)
9435 raise errors.OpExecError("There are some degraded disks for"
9436 " this instance, please cleanup manually")
9438 def _ConvertDrbdToPlain(self, feedback_fn):
9439 """Converts an instance from drbd to plain.
9442 instance = self.instance
9443 assert len(instance.secondary_nodes) == 1
9444 pnode = instance.primary_node
9445 snode = instance.secondary_nodes[0]
9446 feedback_fn("Converting template to plain")
9448 old_disks = instance.disks
9449 new_disks = [d.children[0] for d in old_disks]
9451 # copy over size and mode
9452 for parent, child in zip(old_disks, new_disks):
9453 child.size = parent.size
9454 child.mode = parent.mode
9456 # update instance structure
9457 instance.disks = new_disks
9458 instance.disk_template = constants.DT_PLAIN
9459 self.cfg.Update(instance, feedback_fn)
9461 feedback_fn("Removing volumes on the secondary node...")
9462 for disk in old_disks:
9463 self.cfg.SetDiskID(disk, snode)
9464 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9466 self.LogWarning("Could not remove block device %s on node %s,"
9467 " continuing anyway: %s", disk.iv_name, snode, msg)
9469 feedback_fn("Removing unneeded volumes on the primary node...")
9470 for idx, disk in enumerate(old_disks):
9471 meta = disk.children[1]
9472 self.cfg.SetDiskID(meta, pnode)
9473 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9475 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9476 " continuing anyway: %s", idx, pnode, msg)
9479 def Exec(self, feedback_fn):
9480 """Modifies an instance.
9482 All parameters take effect only at the next restart of the instance.
9485 # Process here the warnings from CheckPrereq, as we don't have a
9486 # feedback_fn there.
9487 for warn in self.warn:
9488 feedback_fn("WARNING: %s" % warn)
9491 instance = self.instance
9493 for disk_op, disk_dict in self.op.disks:
9494 if disk_op == constants.DDM_REMOVE:
9495 # remove the last disk
9496 device = instance.disks.pop()
9497 device_idx = len(instance.disks)
9498 for node, disk in device.ComputeNodeTree(instance.primary_node):
9499 self.cfg.SetDiskID(disk, node)
9500 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9502 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9503 " continuing anyway", device_idx, node, msg)
9504 result.append(("disk/%d" % device_idx, "remove"))
9505 elif disk_op == constants.DDM_ADD:
9507 if instance.disk_template == constants.DT_FILE:
9508 file_driver, file_path = instance.disks[0].logical_id
9509 file_path = os.path.dirname(file_path)
9511 file_driver = file_path = None
9512 disk_idx_base = len(instance.disks)
9513 new_disk = _GenerateDiskTemplate(self,
9514 instance.disk_template,
9515 instance.name, instance.primary_node,
9516 instance.secondary_nodes,
9521 instance.disks.append(new_disk)
9522 info = _GetInstanceInfoText(instance)
9524 logging.info("Creating volume %s for instance %s",
9525 new_disk.iv_name, instance.name)
9526 # Note: this needs to be kept in sync with _CreateDisks
9528 for node in instance.all_nodes:
9529 f_create = node == instance.primary_node
9531 _CreateBlockDev(self, node, instance, new_disk,
9532 f_create, info, f_create)
9533 except errors.OpExecError, err:
9534 self.LogWarning("Failed to create volume %s (%s) on"
9536 new_disk.iv_name, new_disk, node, err)
9537 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9538 (new_disk.size, new_disk.mode)))
9540 # change a given disk
9541 instance.disks[disk_op].mode = disk_dict['mode']
9542 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9544 if self.op.disk_template:
9545 r_shut = _ShutdownInstanceDisks(self, instance)
9547 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9548 " proceed with disk template conversion")
9549 mode = (instance.disk_template, self.op.disk_template)
9551 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9553 self.cfg.ReleaseDRBDMinors(instance.name)
9555 result.append(("disk_template", self.op.disk_template))
9558 for nic_op, nic_dict in self.op.nics:
9559 if nic_op == constants.DDM_REMOVE:
9560 # remove the last nic
9561 del instance.nics[-1]
9562 result.append(("nic.%d" % len(instance.nics), "remove"))
9563 elif nic_op == constants.DDM_ADD:
9564 # mac and bridge should be set, by now
9565 mac = nic_dict['mac']
9566 ip = nic_dict.get('ip', None)
9567 nicparams = self.nic_pinst[constants.DDM_ADD]
9568 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9569 instance.nics.append(new_nic)
9570 result.append(("nic.%d" % (len(instance.nics) - 1),
9571 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9572 (new_nic.mac, new_nic.ip,
9573 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9574 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9577 for key in 'mac', 'ip':
9579 setattr(instance.nics[nic_op], key, nic_dict[key])
9580 if nic_op in self.nic_pinst:
9581 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9582 for key, val in nic_dict.iteritems():
9583 result.append(("nic.%s/%d" % (key, nic_op), val))
9586 if self.op.hvparams:
9587 instance.hvparams = self.hv_inst
9588 for key, val in self.op.hvparams.iteritems():
9589 result.append(("hv/%s" % key, val))
9592 if self.op.beparams:
9593 instance.beparams = self.be_inst
9594 for key, val in self.op.beparams.iteritems():
9595 result.append(("be/%s" % key, val))
9599 instance.os = self.op.os_name
9602 if self.op.osparams:
9603 instance.osparams = self.os_inst
9604 for key, val in self.op.osparams.iteritems():
9605 result.append(("os/%s" % key, val))
9607 self.cfg.Update(instance, feedback_fn)
9611 _DISK_CONVERSIONS = {
9612 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9613 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9617 class LUQueryExports(NoHooksLU):
9618 """Query the exports list
9622 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9623 ("use_locking", False, ht.TBool),
9627 def ExpandNames(self):
9628 self.needed_locks = {}
9629 self.share_locks[locking.LEVEL_NODE] = 1
9630 if not self.op.nodes:
9631 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9633 self.needed_locks[locking.LEVEL_NODE] = \
9634 _GetWantedNodes(self, self.op.nodes)
9636 def Exec(self, feedback_fn):
9637 """Compute the list of all the exported system images.
9640 @return: a dictionary with the structure node->(export-list)
9641 where export-list is a list of the instances exported on
9645 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9646 rpcresult = self.rpc.call_export_list(self.nodes)
9648 for node in rpcresult:
9649 if rpcresult[node].fail_msg:
9650 result[node] = False
9652 result[node] = rpcresult[node].payload
9657 class LUPrepareExport(NoHooksLU):
9658 """Prepares an instance for an export and returns useful information.
9663 ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9667 def ExpandNames(self):
9668 self._ExpandAndLockInstance()
9670 def CheckPrereq(self):
9671 """Check prerequisites.
9674 instance_name = self.op.instance_name
9676 self.instance = self.cfg.GetInstanceInfo(instance_name)
9677 assert self.instance is not None, \
9678 "Cannot retrieve locked instance %s" % self.op.instance_name
9679 _CheckNodeOnline(self, self.instance.primary_node)
9681 self._cds = _GetClusterDomainSecret()
9683 def Exec(self, feedback_fn):
9684 """Prepares an instance for an export.
9687 instance = self.instance
9689 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9690 salt = utils.GenerateSecret(8)
9692 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9693 result = self.rpc.call_x509_cert_create(instance.primary_node,
9694 constants.RIE_CERT_VALIDITY)
9695 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9697 (name, cert_pem) = result.payload
9699 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9703 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9704 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9706 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9712 class LUExportInstance(LogicalUnit):
9713 """Export an instance to an image in the cluster.
9716 HPATH = "instance-export"
9717 HTYPE = constants.HTYPE_INSTANCE
9720 ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9721 ("shutdown", True, ht.TBool),
9723 ("remove_instance", False, ht.TBool),
9724 ("ignore_remove_failures", False, ht.TBool),
9725 ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9726 ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9727 ("destination_x509_ca", None, ht.TMaybeString),
9731 def CheckArguments(self):
9732 """Check the arguments.
9735 self.x509_key_name = self.op.x509_key_name
9736 self.dest_x509_ca_pem = self.op.destination_x509_ca
9738 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9739 if not self.x509_key_name:
9740 raise errors.OpPrereqError("Missing X509 key name for encryption",
9743 if not self.dest_x509_ca_pem:
9744 raise errors.OpPrereqError("Missing destination X509 CA",
9747 def ExpandNames(self):
9748 self._ExpandAndLockInstance()
9750 # Lock all nodes for local exports
9751 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9752 # FIXME: lock only instance primary and destination node
9754 # Sad but true, for now we have do lock all nodes, as we don't know where
9755 # the previous export might be, and in this LU we search for it and
9756 # remove it from its current node. In the future we could fix this by:
9757 # - making a tasklet to search (share-lock all), then create the
9758 # new one, then one to remove, after
9759 # - removing the removal operation altogether
9760 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9762 def DeclareLocks(self, level):
9763 """Last minute lock declaration."""
9764 # All nodes are locked anyway, so nothing to do here.
9766 def BuildHooksEnv(self):
9769 This will run on the master, primary node and target node.
9773 "EXPORT_MODE": self.op.mode,
9774 "EXPORT_NODE": self.op.target_node,
9775 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9776 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9777 # TODO: Generic function for boolean env variables
9778 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9781 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9783 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9785 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9786 nl.append(self.op.target_node)
9790 def CheckPrereq(self):
9791 """Check prerequisites.
9793 This checks that the instance and node names are valid.
9796 instance_name = self.op.instance_name
9798 self.instance = self.cfg.GetInstanceInfo(instance_name)
9799 assert self.instance is not None, \
9800 "Cannot retrieve locked instance %s" % self.op.instance_name
9801 _CheckNodeOnline(self, self.instance.primary_node)
9803 if (self.op.remove_instance and self.instance.admin_up and
9804 not self.op.shutdown):
9805 raise errors.OpPrereqError("Can not remove instance without shutting it"
9808 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9809 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9810 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9811 assert self.dst_node is not None
9813 _CheckNodeOnline(self, self.dst_node.name)
9814 _CheckNodeNotDrained(self, self.dst_node.name)
9817 self.dest_disk_info = None
9818 self.dest_x509_ca = None
9820 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9821 self.dst_node = None
9823 if len(self.op.target_node) != len(self.instance.disks):
9824 raise errors.OpPrereqError(("Received destination information for %s"
9825 " disks, but instance %s has %s disks") %
9826 (len(self.op.target_node), instance_name,
9827 len(self.instance.disks)),
9830 cds = _GetClusterDomainSecret()
9832 # Check X509 key name
9834 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9835 except (TypeError, ValueError), err:
9836 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9838 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9839 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9842 # Load and verify CA
9844 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9845 except OpenSSL.crypto.Error, err:
9846 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9847 (err, ), errors.ECODE_INVAL)
9849 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9850 if errcode is not None:
9851 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9852 (msg, ), errors.ECODE_INVAL)
9854 self.dest_x509_ca = cert
9856 # Verify target information
9858 for idx, disk_data in enumerate(self.op.target_node):
9860 (host, port, magic) = \
9861 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9862 except errors.GenericError, err:
9863 raise errors.OpPrereqError("Target info for disk %s: %s" %
9864 (idx, err), errors.ECODE_INVAL)
9866 disk_info.append((host, port, magic))
9868 assert len(disk_info) == len(self.op.target_node)
9869 self.dest_disk_info = disk_info
9872 raise errors.ProgrammerError("Unhandled export mode %r" %
9875 # instance disk type verification
9876 # TODO: Implement export support for file-based disks
9877 for disk in self.instance.disks:
9878 if disk.dev_type == constants.LD_FILE:
9879 raise errors.OpPrereqError("Export not supported for instances with"
9880 " file-based disks", errors.ECODE_INVAL)
9882 def _CleanupExports(self, feedback_fn):
9883 """Removes exports of current instance from all other nodes.
9885 If an instance in a cluster with nodes A..D was exported to node C, its
9886 exports will be removed from the nodes A, B and D.
9889 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9891 nodelist = self.cfg.GetNodeList()
9892 nodelist.remove(self.dst_node.name)
9894 # on one-node clusters nodelist will be empty after the removal
9895 # if we proceed the backup would be removed because OpQueryExports
9896 # substitutes an empty list with the full cluster node list.
9897 iname = self.instance.name
9899 feedback_fn("Removing old exports for instance %s" % iname)
9900 exportlist = self.rpc.call_export_list(nodelist)
9901 for node in exportlist:
9902 if exportlist[node].fail_msg:
9904 if iname in exportlist[node].payload:
9905 msg = self.rpc.call_export_remove(node, iname).fail_msg
9907 self.LogWarning("Could not remove older export for instance %s"
9908 " on node %s: %s", iname, node, msg)
9910 def Exec(self, feedback_fn):
9911 """Export an instance to an image in the cluster.
9914 assert self.op.mode in constants.EXPORT_MODES
9916 instance = self.instance
9917 src_node = instance.primary_node
9919 if self.op.shutdown:
9920 # shutdown the instance, but not the disks
9921 feedback_fn("Shutting down instance %s" % instance.name)
9922 result = self.rpc.call_instance_shutdown(src_node, instance,
9923 self.op.shutdown_timeout)
9924 # TODO: Maybe ignore failures if ignore_remove_failures is set
9925 result.Raise("Could not shutdown instance %s on"
9926 " node %s" % (instance.name, src_node))
9928 # set the disks ID correctly since call_instance_start needs the
9929 # correct drbd minor to create the symlinks
9930 for disk in instance.disks:
9931 self.cfg.SetDiskID(disk, src_node)
9933 activate_disks = (not instance.admin_up)
9936 # Activate the instance disks if we'exporting a stopped instance
9937 feedback_fn("Activating disks for %s" % instance.name)
9938 _StartInstanceDisks(self, instance, None)
9941 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9944 helper.CreateSnapshots()
9946 if (self.op.shutdown and instance.admin_up and
9947 not self.op.remove_instance):
9948 assert not activate_disks
9949 feedback_fn("Starting instance %s" % instance.name)
9950 result = self.rpc.call_instance_start(src_node, instance, None, None)
9951 msg = result.fail_msg
9953 feedback_fn("Failed to start instance: %s" % msg)
9954 _ShutdownInstanceDisks(self, instance)
9955 raise errors.OpExecError("Could not start instance: %s" % msg)
9957 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9958 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9959 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9960 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9961 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9963 (key_name, _, _) = self.x509_key_name
9966 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9969 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9970 key_name, dest_ca_pem,
9975 # Check for backwards compatibility
9976 assert len(dresults) == len(instance.disks)
9977 assert compat.all(isinstance(i, bool) for i in dresults), \
9978 "Not all results are boolean: %r" % dresults
9982 feedback_fn("Deactivating disks for %s" % instance.name)
9983 _ShutdownInstanceDisks(self, instance)
9985 if not (compat.all(dresults) and fin_resu):
9988 failures.append("export finalization")
9989 if not compat.all(dresults):
9990 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9992 failures.append("disk export: disk(s) %s" % fdsk)
9994 raise errors.OpExecError("Export failed, errors in %s" %
9995 utils.CommaJoin(failures))
9997 # At this point, the export was successful, we can cleanup/finish
9999 # Remove instance if requested
10000 if self.op.remove_instance:
10001 feedback_fn("Removing instance %s" % instance.name)
10002 _RemoveInstance(self, feedback_fn, instance,
10003 self.op.ignore_remove_failures)
10005 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10006 self._CleanupExports(feedback_fn)
10008 return fin_resu, dresults
10011 class LURemoveExport(NoHooksLU):
10012 """Remove exports related to the named instance.
10020 def ExpandNames(self):
10021 self.needed_locks = {}
10022 # We need all nodes to be locked in order for RemoveExport to work, but we
10023 # don't need to lock the instance itself, as nothing will happen to it (and
10024 # we can remove exports also for a removed instance)
10025 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10027 def Exec(self, feedback_fn):
10028 """Remove any export.
10031 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10032 # If the instance was not found we'll try with the name that was passed in.
10033 # This will only work if it was an FQDN, though.
10035 if not instance_name:
10037 instance_name = self.op.instance_name
10039 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10040 exportlist = self.rpc.call_export_list(locked_nodes)
10042 for node in exportlist:
10043 msg = exportlist[node].fail_msg
10045 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10047 if instance_name in exportlist[node].payload:
10049 result = self.rpc.call_export_remove(node, instance_name)
10050 msg = result.fail_msg
10052 logging.error("Could not remove export for instance %s"
10053 " on node %s: %s", instance_name, node, msg)
10055 if fqdn_warn and not found:
10056 feedback_fn("Export not found. If trying to remove an export belonging"
10057 " to a deleted instance please use its Fully Qualified"
10061 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10062 """Generic tags LU.
10064 This is an abstract class which is the parent of all the other tags LUs.
10068 def ExpandNames(self):
10069 self.needed_locks = {}
10070 if self.op.kind == constants.TAG_NODE:
10071 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10072 self.needed_locks[locking.LEVEL_NODE] = self.op.name
10073 elif self.op.kind == constants.TAG_INSTANCE:
10074 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10075 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10077 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10078 # not possible to acquire the BGL based on opcode parameters)
10080 def CheckPrereq(self):
10081 """Check prerequisites.
10084 if self.op.kind == constants.TAG_CLUSTER:
10085 self.target = self.cfg.GetClusterInfo()
10086 elif self.op.kind == constants.TAG_NODE:
10087 self.target = self.cfg.GetNodeInfo(self.op.name)
10088 elif self.op.kind == constants.TAG_INSTANCE:
10089 self.target = self.cfg.GetInstanceInfo(self.op.name)
10091 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10092 str(self.op.kind), errors.ECODE_INVAL)
10095 class LUGetTags(TagsLU):
10096 """Returns the tags of a given object.
10100 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10101 # Name is only meaningful for nodes and instances
10102 ("name", ht.NoDefault, ht.TMaybeString),
10106 def ExpandNames(self):
10107 TagsLU.ExpandNames(self)
10109 # Share locks as this is only a read operation
10110 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10112 def Exec(self, feedback_fn):
10113 """Returns the tag list.
10116 return list(self.target.GetTags())
10119 class LUSearchTags(NoHooksLU):
10120 """Searches the tags for a given pattern.
10124 ("pattern", ht.NoDefault, ht.TNonEmptyString),
10128 def ExpandNames(self):
10129 self.needed_locks = {}
10131 def CheckPrereq(self):
10132 """Check prerequisites.
10134 This checks the pattern passed for validity by compiling it.
10138 self.re = re.compile(self.op.pattern)
10139 except re.error, err:
10140 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10141 (self.op.pattern, err), errors.ECODE_INVAL)
10143 def Exec(self, feedback_fn):
10144 """Returns the tag list.
10148 tgts = [("/cluster", cfg.GetClusterInfo())]
10149 ilist = cfg.GetAllInstancesInfo().values()
10150 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10151 nlist = cfg.GetAllNodesInfo().values()
10152 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10154 for path, target in tgts:
10155 for tag in target.GetTags():
10156 if self.re.search(tag):
10157 results.append((path, tag))
10161 class LUAddTags(TagsLU):
10162 """Sets a tag on a given object.
10166 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10167 # Name is only meaningful for nodes and instances
10168 ("name", ht.NoDefault, ht.TMaybeString),
10169 ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10173 def CheckPrereq(self):
10174 """Check prerequisites.
10176 This checks the type and length of the tag name and value.
10179 TagsLU.CheckPrereq(self)
10180 for tag in self.op.tags:
10181 objects.TaggableObject.ValidateTag(tag)
10183 def Exec(self, feedback_fn):
10188 for tag in self.op.tags:
10189 self.target.AddTag(tag)
10190 except errors.TagError, err:
10191 raise errors.OpExecError("Error while setting tag: %s" % str(err))
10192 self.cfg.Update(self.target, feedback_fn)
10195 class LUDelTags(TagsLU):
10196 """Delete a list of tags from a given object.
10200 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10201 # Name is only meaningful for nodes and instances
10202 ("name", ht.NoDefault, ht.TMaybeString),
10203 ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10207 def CheckPrereq(self):
10208 """Check prerequisites.
10210 This checks that we have the given tag.
10213 TagsLU.CheckPrereq(self)
10214 for tag in self.op.tags:
10215 objects.TaggableObject.ValidateTag(tag)
10216 del_tags = frozenset(self.op.tags)
10217 cur_tags = self.target.GetTags()
10219 diff_tags = del_tags - cur_tags
10221 diff_names = ("'%s'" % i for i in sorted(diff_tags))
10222 raise errors.OpPrereqError("Tag(s) %s not found" %
10223 (utils.CommaJoin(diff_names), ),
10224 errors.ECODE_NOENT)
10226 def Exec(self, feedback_fn):
10227 """Remove the tag from the object.
10230 for tag in self.op.tags:
10231 self.target.RemoveTag(tag)
10232 self.cfg.Update(self.target, feedback_fn)
10235 class LUTestDelay(NoHooksLU):
10236 """Sleep for a specified amount of time.
10238 This LU sleeps on the master and/or nodes for a specified amount of
10243 ("duration", ht.NoDefault, ht.TFloat),
10244 ("on_master", True, ht.TBool),
10245 ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10246 ("repeat", 0, ht.TPositiveInt)
10250 def ExpandNames(self):
10251 """Expand names and set required locks.
10253 This expands the node list, if any.
10256 self.needed_locks = {}
10257 if self.op.on_nodes:
10258 # _GetWantedNodes can be used here, but is not always appropriate to use
10259 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10260 # more information.
10261 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10262 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10264 def _TestDelay(self):
10265 """Do the actual sleep.
10268 if self.op.on_master:
10269 if not utils.TestDelay(self.op.duration):
10270 raise errors.OpExecError("Error during master delay test")
10271 if self.op.on_nodes:
10272 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10273 for node, node_result in result.items():
10274 node_result.Raise("Failure during rpc call to node %s" % node)
10276 def Exec(self, feedback_fn):
10277 """Execute the test delay opcode, with the wanted repetitions.
10280 if self.op.repeat == 0:
10283 top_value = self.op.repeat - 1
10284 for i in range(self.op.repeat):
10285 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10289 class LUTestJobqueue(NoHooksLU):
10290 """Utility LU to test some aspects of the job queue.
10294 ("notify_waitlock", False, ht.TBool),
10295 ("notify_exec", False, ht.TBool),
10296 ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10297 ("fail", False, ht.TBool),
10301 # Must be lower than default timeout for WaitForJobChange to see whether it
10302 # notices changed jobs
10303 _CLIENT_CONNECT_TIMEOUT = 20.0
10304 _CLIENT_CONFIRM_TIMEOUT = 60.0
10307 def _NotifyUsingSocket(cls, cb, errcls):
10308 """Opens a Unix socket and waits for another program to connect.
10311 @param cb: Callback to send socket name to client
10312 @type errcls: class
10313 @param errcls: Exception class to use for errors
10316 # Using a temporary directory as there's no easy way to create temporary
10317 # sockets without writing a custom loop around tempfile.mktemp and
10319 tmpdir = tempfile.mkdtemp()
10321 tmpsock = utils.PathJoin(tmpdir, "sock")
10323 logging.debug("Creating temporary socket at %s", tmpsock)
10324 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10329 # Send details to client
10332 # Wait for client to connect before continuing
10333 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10335 (conn, _) = sock.accept()
10336 except socket.error, err:
10337 raise errcls("Client didn't connect in time (%s)" % err)
10341 # Remove as soon as client is connected
10342 shutil.rmtree(tmpdir)
10344 # Wait for client to close
10347 # pylint: disable-msg=E1101
10348 # Instance of '_socketobject' has no ... member
10349 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10351 except socket.error, err:
10352 raise errcls("Client failed to confirm notification (%s)" % err)
10356 def _SendNotification(self, test, arg, sockname):
10357 """Sends a notification to the client.
10360 @param test: Test name
10361 @param arg: Test argument (depends on test)
10362 @type sockname: string
10363 @param sockname: Socket path
10366 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10368 def _Notify(self, prereq, test, arg):
10369 """Notifies the client of a test.
10372 @param prereq: Whether this is a prereq-phase test
10374 @param test: Test name
10375 @param arg: Test argument (depends on test)
10379 errcls = errors.OpPrereqError
10381 errcls = errors.OpExecError
10383 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10387 def CheckArguments(self):
10388 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10389 self.expandnames_calls = 0
10391 def ExpandNames(self):
10392 checkargs_calls = getattr(self, "checkargs_calls", 0)
10393 if checkargs_calls < 1:
10394 raise errors.ProgrammerError("CheckArguments was not called")
10396 self.expandnames_calls += 1
10398 if self.op.notify_waitlock:
10399 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10401 self.LogInfo("Expanding names")
10403 # Get lock on master node (just to get a lock, not for a particular reason)
10404 self.needed_locks = {
10405 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10408 def Exec(self, feedback_fn):
10409 if self.expandnames_calls < 1:
10410 raise errors.ProgrammerError("ExpandNames was not called")
10412 if self.op.notify_exec:
10413 self._Notify(False, constants.JQT_EXEC, None)
10415 self.LogInfo("Executing")
10417 if self.op.log_messages:
10418 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10419 for idx, msg in enumerate(self.op.log_messages):
10420 self.LogInfo("Sending log message %s", idx + 1)
10421 feedback_fn(constants.JQT_MSGPREFIX + msg)
10422 # Report how many test messages have been sent
10423 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10426 raise errors.OpExecError("Opcode failure was requested")
10431 class IAllocator(object):
10432 """IAllocator framework.
10434 An IAllocator instance has three sets of attributes:
10435 - cfg that is needed to query the cluster
10436 - input data (all members of the _KEYS class attribute are required)
10437 - four buffer attributes (in|out_data|text), that represent the
10438 input (to the external script) in text and data structure format,
10439 and the output from it, again in two formats
10440 - the result variables from the script (success, info, nodes) for
10444 # pylint: disable-msg=R0902
10445 # lots of instance attributes
10447 "name", "mem_size", "disks", "disk_template",
10448 "os", "tags", "nics", "vcpus", "hypervisor",
10451 "name", "relocate_from",
10457 def __init__(self, cfg, rpc, mode, **kwargs):
10460 # init buffer variables
10461 self.in_text = self.out_text = self.in_data = self.out_data = None
10462 # init all input fields so that pylint is happy
10464 self.mem_size = self.disks = self.disk_template = None
10465 self.os = self.tags = self.nics = self.vcpus = None
10466 self.hypervisor = None
10467 self.relocate_from = None
10469 self.evac_nodes = None
10471 self.required_nodes = None
10472 # init result fields
10473 self.success = self.info = self.result = None
10474 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10475 keyset = self._ALLO_KEYS
10476 fn = self._AddNewInstance
10477 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10478 keyset = self._RELO_KEYS
10479 fn = self._AddRelocateInstance
10480 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10481 keyset = self._EVAC_KEYS
10482 fn = self._AddEvacuateNodes
10484 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10485 " IAllocator" % self.mode)
10487 if key not in keyset:
10488 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10489 " IAllocator" % key)
10490 setattr(self, key, kwargs[key])
10493 if key not in kwargs:
10494 raise errors.ProgrammerError("Missing input parameter '%s' to"
10495 " IAllocator" % key)
10496 self._BuildInputData(fn)
10498 def _ComputeClusterData(self):
10499 """Compute the generic allocator input data.
10501 This is the data that is independent of the actual operation.
10505 cluster_info = cfg.GetClusterInfo()
10508 "version": constants.IALLOCATOR_VERSION,
10509 "cluster_name": cfg.GetClusterName(),
10510 "cluster_tags": list(cluster_info.GetTags()),
10511 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10512 # we don't have job IDs
10514 iinfo = cfg.GetAllInstancesInfo().values()
10515 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10518 node_list = cfg.GetNodeList()
10520 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10521 hypervisor_name = self.hypervisor
10522 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10523 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10524 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10525 hypervisor_name = cluster_info.enabled_hypervisors[0]
10527 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10530 self.rpc.call_all_instances_info(node_list,
10531 cluster_info.enabled_hypervisors)
10533 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10535 data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10537 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10539 self.in_data = data
10542 def _ComputeNodeGroupData(cfg):
10543 """Compute node groups data.
10547 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10548 ng[guuid] = { "name": gdata.name }
10552 def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10553 """Compute global node data.
10557 for nname, nresult in node_data.items():
10558 # first fill in static (config-based) values
10559 ninfo = cfg.GetNodeInfo(nname)
10561 "tags": list(ninfo.GetTags()),
10562 "primary_ip": ninfo.primary_ip,
10563 "secondary_ip": ninfo.secondary_ip,
10564 "offline": ninfo.offline,
10565 "drained": ninfo.drained,
10566 "master_candidate": ninfo.master_candidate,
10567 "group": ninfo.group,
10568 "master_capable": ninfo.master_capable,
10569 "vm_capable": ninfo.vm_capable,
10572 if not (ninfo.offline or ninfo.drained):
10573 nresult.Raise("Can't get data for node %s" % nname)
10574 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10576 remote_info = nresult.payload
10578 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10579 'vg_size', 'vg_free', 'cpu_total']:
10580 if attr not in remote_info:
10581 raise errors.OpExecError("Node '%s' didn't return attribute"
10582 " '%s'" % (nname, attr))
10583 if not isinstance(remote_info[attr], int):
10584 raise errors.OpExecError("Node '%s' returned invalid value"
10586 (nname, attr, remote_info[attr]))
10587 # compute memory used by primary instances
10588 i_p_mem = i_p_up_mem = 0
10589 for iinfo, beinfo in i_list:
10590 if iinfo.primary_node == nname:
10591 i_p_mem += beinfo[constants.BE_MEMORY]
10592 if iinfo.name not in node_iinfo[nname].payload:
10595 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10596 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10597 remote_info['memory_free'] -= max(0, i_mem_diff)
10600 i_p_up_mem += beinfo[constants.BE_MEMORY]
10602 # compute memory used by instances
10604 "total_memory": remote_info['memory_total'],
10605 "reserved_memory": remote_info['memory_dom0'],
10606 "free_memory": remote_info['memory_free'],
10607 "total_disk": remote_info['vg_size'],
10608 "free_disk": remote_info['vg_free'],
10609 "total_cpus": remote_info['cpu_total'],
10610 "i_pri_memory": i_p_mem,
10611 "i_pri_up_memory": i_p_up_mem,
10613 pnr.update(pnr_dyn)
10615 node_results[nname] = pnr
10617 return node_results
10620 def _ComputeInstanceData(cluster_info, i_list):
10621 """Compute global instance data.
10625 for iinfo, beinfo in i_list:
10627 for nic in iinfo.nics:
10628 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10629 nic_dict = {"mac": nic.mac,
10631 "mode": filled_params[constants.NIC_MODE],
10632 "link": filled_params[constants.NIC_LINK],
10634 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10635 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10636 nic_data.append(nic_dict)
10638 "tags": list(iinfo.GetTags()),
10639 "admin_up": iinfo.admin_up,
10640 "vcpus": beinfo[constants.BE_VCPUS],
10641 "memory": beinfo[constants.BE_MEMORY],
10643 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10645 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10646 "disk_template": iinfo.disk_template,
10647 "hypervisor": iinfo.hypervisor,
10649 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10651 instance_data[iinfo.name] = pir
10653 return instance_data
10655 def _AddNewInstance(self):
10656 """Add new instance data to allocator structure.
10658 This in combination with _AllocatorGetClusterData will create the
10659 correct structure needed as input for the allocator.
10661 The checks for the completeness of the opcode must have already been
10665 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10667 if self.disk_template in constants.DTS_NET_MIRROR:
10668 self.required_nodes = 2
10670 self.required_nodes = 1
10673 "disk_template": self.disk_template,
10676 "vcpus": self.vcpus,
10677 "memory": self.mem_size,
10678 "disks": self.disks,
10679 "disk_space_total": disk_space,
10681 "required_nodes": self.required_nodes,
10685 def _AddRelocateInstance(self):
10686 """Add relocate instance data to allocator structure.
10688 This in combination with _IAllocatorGetClusterData will create the
10689 correct structure needed as input for the allocator.
10691 The checks for the completeness of the opcode must have already been
10695 instance = self.cfg.GetInstanceInfo(self.name)
10696 if instance is None:
10697 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10698 " IAllocator" % self.name)
10700 if instance.disk_template not in constants.DTS_NET_MIRROR:
10701 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10702 errors.ECODE_INVAL)
10704 if len(instance.secondary_nodes) != 1:
10705 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10706 errors.ECODE_STATE)
10708 self.required_nodes = 1
10709 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10710 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10714 "disk_space_total": disk_space,
10715 "required_nodes": self.required_nodes,
10716 "relocate_from": self.relocate_from,
10720 def _AddEvacuateNodes(self):
10721 """Add evacuate nodes data to allocator structure.
10725 "evac_nodes": self.evac_nodes
10729 def _BuildInputData(self, fn):
10730 """Build input data structures.
10733 self._ComputeClusterData()
10736 request["type"] = self.mode
10737 self.in_data["request"] = request
10739 self.in_text = serializer.Dump(self.in_data)
10741 def Run(self, name, validate=True, call_fn=None):
10742 """Run an instance allocator and return the results.
10745 if call_fn is None:
10746 call_fn = self.rpc.call_iallocator_runner
10748 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10749 result.Raise("Failure while running the iallocator script")
10751 self.out_text = result.payload
10753 self._ValidateResult()
10755 def _ValidateResult(self):
10756 """Process the allocator results.
10758 This will process and if successful save the result in
10759 self.out_data and the other parameters.
10763 rdict = serializer.Load(self.out_text)
10764 except Exception, err:
10765 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10767 if not isinstance(rdict, dict):
10768 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10770 # TODO: remove backwards compatiblity in later versions
10771 if "nodes" in rdict and "result" not in rdict:
10772 rdict["result"] = rdict["nodes"]
10775 for key in "success", "info", "result":
10776 if key not in rdict:
10777 raise errors.OpExecError("Can't parse iallocator results:"
10778 " missing key '%s'" % key)
10779 setattr(self, key, rdict[key])
10781 if not isinstance(rdict["result"], list):
10782 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10784 self.out_data = rdict
10787 class LUTestAllocator(NoHooksLU):
10788 """Run allocator tests.
10790 This LU runs the allocator tests
10794 ("direction", ht.NoDefault,
10795 ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10796 ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
10797 ("name", ht.NoDefault, ht.TNonEmptyString),
10798 ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
10799 ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
10800 ht.TOr(ht.TNone, ht.TNonEmptyString))))),
10801 ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
10802 ("hypervisor", None, ht.TMaybeString),
10803 ("allocator", None, ht.TMaybeString),
10804 ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10805 ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10806 ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10807 ("os", None, ht.TMaybeString),
10808 ("disk_template", None, ht.TMaybeString),
10809 ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
10812 def CheckPrereq(self):
10813 """Check prerequisites.
10815 This checks the opcode parameters depending on the director and mode test.
10818 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10819 for attr in ["mem_size", "disks", "disk_template",
10820 "os", "tags", "nics", "vcpus"]:
10821 if not hasattr(self.op, attr):
10822 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10823 attr, errors.ECODE_INVAL)
10824 iname = self.cfg.ExpandInstanceName(self.op.name)
10825 if iname is not None:
10826 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10827 iname, errors.ECODE_EXISTS)
10828 if not isinstance(self.op.nics, list):
10829 raise errors.OpPrereqError("Invalid parameter 'nics'",
10830 errors.ECODE_INVAL)
10831 if not isinstance(self.op.disks, list):
10832 raise errors.OpPrereqError("Invalid parameter 'disks'",
10833 errors.ECODE_INVAL)
10834 for row in self.op.disks:
10835 if (not isinstance(row, dict) or
10836 "size" not in row or
10837 not isinstance(row["size"], int) or
10838 "mode" not in row or
10839 row["mode"] not in ['r', 'w']):
10840 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10841 " parameter", errors.ECODE_INVAL)
10842 if self.op.hypervisor is None:
10843 self.op.hypervisor = self.cfg.GetHypervisorType()
10844 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10845 fname = _ExpandInstanceName(self.cfg, self.op.name)
10846 self.op.name = fname
10847 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10848 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10849 if not hasattr(self.op, "evac_nodes"):
10850 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10851 " opcode input", errors.ECODE_INVAL)
10853 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10854 self.op.mode, errors.ECODE_INVAL)
10856 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10857 if self.op.allocator is None:
10858 raise errors.OpPrereqError("Missing allocator name",
10859 errors.ECODE_INVAL)
10860 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10861 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10862 self.op.direction, errors.ECODE_INVAL)
10864 def Exec(self, feedback_fn):
10865 """Run the allocator test.
10868 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10869 ial = IAllocator(self.cfg, self.rpc,
10872 mem_size=self.op.mem_size,
10873 disks=self.op.disks,
10874 disk_template=self.op.disk_template,
10878 vcpus=self.op.vcpus,
10879 hypervisor=self.op.hypervisor,
10881 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10882 ial = IAllocator(self.cfg, self.rpc,
10885 relocate_from=list(self.relocate_from),
10887 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10888 ial = IAllocator(self.cfg, self.rpc,
10890 evac_nodes=self.op.evac_nodes)
10892 raise errors.ProgrammerError("Uncatched mode %s in"
10893 " LUTestAllocator.Exec", self.op.mode)
10895 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10896 result = ial.in_text
10898 ial.Run(self.op.allocator, validate=False)
10899 result = ial.out_text