4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
59 import ganeti.masterd.instance # pylint: disable-msg=W0611
61 # Common opcode attributes
63 #: output fields for a query operation
64 _POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
67 #: the shutdown timeout
68 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
71 #: the force parameter
72 _PForce = ("force", False, ht.TBool)
74 #: a required instance name (for single-instance LUs)
75 _PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
77 #: Whether to ignore offline nodes
78 _PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
80 #: a required node name (for single-node LUs)
81 _PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
83 #: the migration type (live/non-live)
84 _PMigrationMode = ("mode", None,
85 ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
87 #: the obsolete 'live' mode (boolean)
88 _PMigrationLive = ("live", None, ht.TMaybeBool)
92 class LogicalUnit(object):
93 """Logical Unit base class.
95 Subclasses must follow these rules:
96 - implement ExpandNames
97 - implement CheckPrereq (except when tasklets are used)
98 - implement Exec (except when tasklets are used)
99 - implement BuildHooksEnv
100 - redefine HPATH and HTYPE
101 - optionally redefine their run requirements:
102 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
104 Note that all commands require root permissions.
106 @ivar dry_run_result: the value (if any) that will be returned to the caller
107 in dry-run mode (signalled by opcode dry_run parameter)
108 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
109 they should get if not already defined, and types they must match
117 def __init__(self, processor, op, context, rpc):
118 """Constructor for LogicalUnit.
120 This needs to be overridden in derived classes in order to check op
124 self.proc = processor
126 self.cfg = context.cfg
127 self.context = context
129 # Dicts used to declare locking needs to mcpu
130 self.needed_locks = None
131 self.acquired_locks = {}
132 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134 self.remove_locks = {}
135 # Used to force good behavior when calling helper functions
136 self.recalculate_locks = {}
139 self.Log = processor.Log # pylint: disable-msg=C0103
140 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
141 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
142 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
143 # support for dry-run
144 self.dry_run_result = None
145 # support for generic debug attribute
146 if (not hasattr(self.op, "debug_level") or
147 not isinstance(self.op.debug_level, int)):
148 self.op.debug_level = 0
153 # The new kind-of-type-system
154 op_id = self.op.OP_ID
155 for attr_name, aval, test in self._OP_PARAMS:
156 if not hasattr(op, attr_name):
157 if aval == ht.NoDefault:
158 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
159 (op_id, attr_name), errors.ECODE_INVAL)
165 setattr(self.op, attr_name, dval)
166 attr_val = getattr(op, attr_name)
167 if test == ht.NoType:
170 if not callable(test):
171 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
172 " given type is not a proper type (%s)" %
173 (op_id, attr_name, test))
174 if not test(attr_val):
175 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
176 self.op.OP_ID, attr_name, type(attr_val), attr_val)
177 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
178 (op_id, attr_name), errors.ECODE_INVAL)
180 self.CheckArguments()
183 """Returns the SshRunner object
187 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
190 ssh = property(fget=__GetSSH)
192 def CheckArguments(self):
193 """Check syntactic validity for the opcode arguments.
195 This method is for doing a simple syntactic check and ensure
196 validity of opcode parameters, without any cluster-related
197 checks. While the same can be accomplished in ExpandNames and/or
198 CheckPrereq, doing these separate is better because:
200 - ExpandNames is left as as purely a lock-related function
201 - CheckPrereq is run after we have acquired locks (and possible
204 The function is allowed to change the self.op attribute so that
205 later methods can no longer worry about missing parameters.
210 def ExpandNames(self):
211 """Expand names for this LU.
213 This method is called before starting to execute the opcode, and it should
214 update all the parameters of the opcode to their canonical form (e.g. a
215 short node name must be fully expanded after this method has successfully
216 completed). This way locking, hooks, logging, ecc. can work correctly.
218 LUs which implement this method must also populate the self.needed_locks
219 member, as a dict with lock levels as keys, and a list of needed lock names
222 - use an empty dict if you don't need any lock
223 - if you don't need any lock at a particular level omit that level
224 - don't put anything for the BGL level
225 - if you want all locks at a level use locking.ALL_SET as a value
227 If you need to share locks (rather than acquire them exclusively) at one
228 level you can modify self.share_locks, setting a true value (usually 1) for
229 that level. By default locks are not shared.
231 This function can also define a list of tasklets, which then will be
232 executed in order instead of the usual LU-level CheckPrereq and Exec
233 functions, if those are not defined by the LU.
237 # Acquire all nodes and one instance
238 self.needed_locks = {
239 locking.LEVEL_NODE: locking.ALL_SET,
240 locking.LEVEL_INSTANCE: ['instance1.example.com'],
242 # Acquire just two nodes
243 self.needed_locks = {
244 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
247 self.needed_locks = {} # No, you can't leave it to the default value None
250 # The implementation of this method is mandatory only if the new LU is
251 # concurrent, so that old LUs don't need to be changed all at the same
254 self.needed_locks = {} # Exclusive LUs don't need locks.
256 raise NotImplementedError
258 def DeclareLocks(self, level):
259 """Declare LU locking needs for a level
261 While most LUs can just declare their locking needs at ExpandNames time,
262 sometimes there's the need to calculate some locks after having acquired
263 the ones before. This function is called just before acquiring locks at a
264 particular level, but after acquiring the ones at lower levels, and permits
265 such calculations. It can be used to modify self.needed_locks, and by
266 default it does nothing.
268 This function is only called if you have something already set in
269 self.needed_locks for the level.
271 @param level: Locking level which is going to be locked
272 @type level: member of ganeti.locking.LEVELS
276 def CheckPrereq(self):
277 """Check prerequisites for this LU.
279 This method should check that the prerequisites for the execution
280 of this LU are fulfilled. It can do internode communication, but
281 it should be idempotent - no cluster or system changes are
284 The method should raise errors.OpPrereqError in case something is
285 not fulfilled. Its return value is ignored.
287 This method should also update all the parameters of the opcode to
288 their canonical form if it hasn't been done by ExpandNames before.
291 if self.tasklets is not None:
292 for (idx, tl) in enumerate(self.tasklets):
293 logging.debug("Checking prerequisites for tasklet %s/%s",
294 idx + 1, len(self.tasklets))
299 def Exec(self, feedback_fn):
302 This method should implement the actual work. It should raise
303 errors.OpExecError for failures that are somewhat dealt with in
307 if self.tasklets is not None:
308 for (idx, tl) in enumerate(self.tasklets):
309 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
312 raise NotImplementedError
314 def BuildHooksEnv(self):
315 """Build hooks environment for this LU.
317 This method should return a three-node tuple consisting of: a dict
318 containing the environment that will be used for running the
319 specific hook for this LU, a list of node names on which the hook
320 should run before the execution, and a list of node names on which
321 the hook should run after the execution.
323 The keys of the dict must not have 'GANETI_' prefixed as this will
324 be handled in the hooks runner. Also note additional keys will be
325 added by the hooks runner. If the LU doesn't define any
326 environment, an empty dict (and not None) should be returned.
328 No nodes should be returned as an empty list (and not None).
330 Note that if the HPATH for a LU class is None, this function will
334 raise NotImplementedError
336 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
337 """Notify the LU about the results of its hooks.
339 This method is called every time a hooks phase is executed, and notifies
340 the Logical Unit about the hooks' result. The LU can then use it to alter
341 its result based on the hooks. By default the method does nothing and the
342 previous result is passed back unchanged but any LU can define it if it
343 wants to use the local cluster hook-scripts somehow.
345 @param phase: one of L{constants.HOOKS_PHASE_POST} or
346 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
347 @param hook_results: the results of the multi-node hooks rpc call
348 @param feedback_fn: function used send feedback back to the caller
349 @param lu_result: the previous Exec result this LU had, or None
351 @return: the new Exec result, based on the previous result
355 # API must be kept, thus we ignore the unused argument and could
356 # be a function warnings
357 # pylint: disable-msg=W0613,R0201
360 def _ExpandAndLockInstance(self):
361 """Helper function to expand and lock an instance.
363 Many LUs that work on an instance take its name in self.op.instance_name
364 and need to expand it and then declare the expanded name for locking. This
365 function does it, and then updates self.op.instance_name to the expanded
366 name. It also initializes needed_locks as a dict, if this hasn't been done
370 if self.needed_locks is None:
371 self.needed_locks = {}
373 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
374 "_ExpandAndLockInstance called with instance-level locks set"
375 self.op.instance_name = _ExpandInstanceName(self.cfg,
376 self.op.instance_name)
377 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
379 def _LockInstancesNodes(self, primary_only=False):
380 """Helper function to declare instances' nodes for locking.
382 This function should be called after locking one or more instances to lock
383 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
384 with all primary or secondary nodes for instances already locked and
385 present in self.needed_locks[locking.LEVEL_INSTANCE].
387 It should be called from DeclareLocks, and for safety only works if
388 self.recalculate_locks[locking.LEVEL_NODE] is set.
390 In the future it may grow parameters to just lock some instance's nodes, or
391 to just lock primaries or secondary nodes, if needed.
393 If should be called in DeclareLocks in a way similar to::
395 if level == locking.LEVEL_NODE:
396 self._LockInstancesNodes()
398 @type primary_only: boolean
399 @param primary_only: only lock primary nodes of locked instances
402 assert locking.LEVEL_NODE in self.recalculate_locks, \
403 "_LockInstancesNodes helper function called with no nodes to recalculate"
405 # TODO: check if we're really been called with the instance locks held
407 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
408 # future we might want to have different behaviors depending on the value
409 # of self.recalculate_locks[locking.LEVEL_NODE]
411 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
412 instance = self.context.cfg.GetInstanceInfo(instance_name)
413 wanted_nodes.append(instance.primary_node)
415 wanted_nodes.extend(instance.secondary_nodes)
417 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
418 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
419 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
420 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
422 del self.recalculate_locks[locking.LEVEL_NODE]
425 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
426 """Simple LU which runs no hooks.
428 This LU is intended as a parent for other LogicalUnits which will
429 run no hooks, in order to reduce duplicate code.
435 def BuildHooksEnv(self):
436 """Empty BuildHooksEnv for NoHooksLu.
438 This just raises an error.
441 assert False, "BuildHooksEnv called for NoHooksLUs"
445 """Tasklet base class.
447 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
448 they can mix legacy code with tasklets. Locking needs to be done in the LU,
449 tasklets know nothing about locks.
451 Subclasses must follow these rules:
452 - Implement CheckPrereq
456 def __init__(self, lu):
463 def CheckPrereq(self):
464 """Check prerequisites for this tasklets.
466 This method should check whether the prerequisites for the execution of
467 this tasklet are fulfilled. It can do internode communication, but it
468 should be idempotent - no cluster or system changes are allowed.
470 The method should raise errors.OpPrereqError in case something is not
471 fulfilled. Its return value is ignored.
473 This method should also update all parameters to their canonical form if it
474 hasn't been done before.
479 def Exec(self, feedback_fn):
480 """Execute the tasklet.
482 This method should implement the actual work. It should raise
483 errors.OpExecError for failures that are somewhat dealt with in code, or
487 raise NotImplementedError
490 def _GetWantedNodes(lu, nodes):
491 """Returns list of checked and expanded node names.
493 @type lu: L{LogicalUnit}
494 @param lu: the logical unit on whose behalf we execute
496 @param nodes: list of node names or None for all nodes
498 @return: the list of nodes, sorted
499 @raise errors.ProgrammerError: if the nodes parameter is wrong type
503 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
504 " non-empty list of nodes whose name is to be expanded.")
506 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
507 return utils.NiceSort(wanted)
510 def _GetWantedInstances(lu, instances):
511 """Returns list of checked and expanded instance names.
513 @type lu: L{LogicalUnit}
514 @param lu: the logical unit on whose behalf we execute
515 @type instances: list
516 @param instances: list of instance names or None for all instances
518 @return: the list of instances, sorted
519 @raise errors.OpPrereqError: if the instances parameter is wrong type
520 @raise errors.OpPrereqError: if any of the passed instances is not found
524 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
526 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
530 def _GetUpdatedParams(old_params, update_dict,
531 use_default=True, use_none=False):
532 """Return the new version of a parameter dictionary.
534 @type old_params: dict
535 @param old_params: old parameters
536 @type update_dict: dict
537 @param update_dict: dict containing new parameter values, or
538 constants.VALUE_DEFAULT to reset the parameter to its default
540 @param use_default: boolean
541 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
542 values as 'to be deleted' values
543 @param use_none: boolean
544 @type use_none: whether to recognise C{None} values as 'to be
547 @return: the new parameter dictionary
550 params_copy = copy.deepcopy(old_params)
551 for key, val in update_dict.iteritems():
552 if ((use_default and val == constants.VALUE_DEFAULT) or
553 (use_none and val is None)):
559 params_copy[key] = val
563 def _CheckOutputFields(static, dynamic, selected):
564 """Checks whether all selected fields are valid.
566 @type static: L{utils.FieldSet}
567 @param static: static fields set
568 @type dynamic: L{utils.FieldSet}
569 @param dynamic: dynamic fields set
576 delta = f.NonMatching(selected)
578 raise errors.OpPrereqError("Unknown output fields selected: %s"
579 % ",".join(delta), errors.ECODE_INVAL)
582 def _CheckGlobalHvParams(params):
583 """Validates that given hypervisor params are not global ones.
585 This will ensure that instances don't get customised versions of
589 used_globals = constants.HVC_GLOBALS.intersection(params)
591 msg = ("The following hypervisor parameters are global and cannot"
592 " be customized at instance level, please modify them at"
593 " cluster level: %s" % utils.CommaJoin(used_globals))
594 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
597 def _CheckNodeOnline(lu, node, msg=None):
598 """Ensure that a given node is online.
600 @param lu: the LU on behalf of which we make the check
601 @param node: the node to check
602 @param msg: if passed, should be a message to replace the default one
603 @raise errors.OpPrereqError: if the node is offline
607 msg = "Can't use offline node"
608 if lu.cfg.GetNodeInfo(node).offline:
609 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
612 def _CheckNodeNotDrained(lu, node):
613 """Ensure that a given node is not drained.
615 @param lu: the LU on behalf of which we make the check
616 @param node: the node to check
617 @raise errors.OpPrereqError: if the node is drained
620 if lu.cfg.GetNodeInfo(node).drained:
621 raise errors.OpPrereqError("Can't use drained node %s" % node,
625 def _CheckNodeVmCapable(lu, node):
626 """Ensure that a given node is vm capable.
628 @param lu: the LU on behalf of which we make the check
629 @param node: the node to check
630 @raise errors.OpPrereqError: if the node is not vm capable
633 if not lu.cfg.GetNodeInfo(node).vm_capable:
634 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
638 def _CheckNodeHasOS(lu, node, os_name, force_variant):
639 """Ensure that a node supports a given OS.
641 @param lu: the LU on behalf of which we make the check
642 @param node: the node to check
643 @param os_name: the OS to query about
644 @param force_variant: whether to ignore variant errors
645 @raise errors.OpPrereqError: if the node is not supporting the OS
648 result = lu.rpc.call_os_get(node, os_name)
649 result.Raise("OS '%s' not in supported OS list for node %s" %
651 prereq=True, ecode=errors.ECODE_INVAL)
652 if not force_variant:
653 _CheckOSVariant(result.payload, os_name)
656 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
657 """Ensure that a node has the given secondary ip.
659 @type lu: L{LogicalUnit}
660 @param lu: the LU on behalf of which we make the check
662 @param node: the node to check
663 @type secondary_ip: string
664 @param secondary_ip: the ip to check
665 @type prereq: boolean
666 @param prereq: whether to throw a prerequisite or an execute error
667 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
668 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
671 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
672 result.Raise("Failure checking secondary ip on node %s" % node,
673 prereq=prereq, ecode=errors.ECODE_ENVIRON)
674 if not result.payload:
675 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
676 " please fix and re-run this command" % secondary_ip)
678 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
680 raise errors.OpExecError(msg)
683 def _RequireFileStorage():
684 """Checks that file storage is enabled.
686 @raise errors.OpPrereqError: when file storage is disabled
689 if not constants.ENABLE_FILE_STORAGE:
690 raise errors.OpPrereqError("File storage disabled at configure time",
694 def _CheckDiskTemplate(template):
695 """Ensure a given disk template is valid.
698 if template not in constants.DISK_TEMPLATES:
699 msg = ("Invalid disk template name '%s', valid templates are: %s" %
700 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
701 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
702 if template == constants.DT_FILE:
703 _RequireFileStorage()
707 def _CheckStorageType(storage_type):
708 """Ensure a given storage type is valid.
711 if storage_type not in constants.VALID_STORAGE_TYPES:
712 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
714 if storage_type == constants.ST_FILE:
715 _RequireFileStorage()
719 def _GetClusterDomainSecret():
720 """Reads the cluster domain secret.
723 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
727 def _CheckInstanceDown(lu, instance, reason):
728 """Ensure that an instance is not running."""
729 if instance.admin_up:
730 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
731 (instance.name, reason), errors.ECODE_STATE)
733 pnode = instance.primary_node
734 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
735 ins_l.Raise("Can't contact node %s for instance information" % pnode,
736 prereq=True, ecode=errors.ECODE_ENVIRON)
738 if instance.name in ins_l.payload:
739 raise errors.OpPrereqError("Instance %s is running, %s" %
740 (instance.name, reason), errors.ECODE_STATE)
743 def _ExpandItemName(fn, name, kind):
744 """Expand an item name.
746 @param fn: the function to use for expansion
747 @param name: requested item name
748 @param kind: text description ('Node' or 'Instance')
749 @return: the resolved (full) name
750 @raise errors.OpPrereqError: if the item is not found
754 if full_name is None:
755 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
760 def _ExpandNodeName(cfg, name):
761 """Wrapper over L{_ExpandItemName} for nodes."""
762 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
765 def _ExpandInstanceName(cfg, name):
766 """Wrapper over L{_ExpandItemName} for instance."""
767 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
770 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
771 memory, vcpus, nics, disk_template, disks,
772 bep, hvp, hypervisor_name):
773 """Builds instance related env variables for hooks
775 This builds the hook environment from individual variables.
778 @param name: the name of the instance
779 @type primary_node: string
780 @param primary_node: the name of the instance's primary node
781 @type secondary_nodes: list
782 @param secondary_nodes: list of secondary nodes as strings
783 @type os_type: string
784 @param os_type: the name of the instance's OS
785 @type status: boolean
786 @param status: the should_run status of the instance
788 @param memory: the memory size of the instance
790 @param vcpus: the count of VCPUs the instance has
792 @param nics: list of tuples (ip, mac, mode, link) representing
793 the NICs the instance has
794 @type disk_template: string
795 @param disk_template: the disk template of the instance
797 @param disks: the list of (size, mode) pairs
799 @param bep: the backend parameters for the instance
801 @param hvp: the hypervisor parameters for the instance
802 @type hypervisor_name: string
803 @param hypervisor_name: the hypervisor for the instance
805 @return: the hook environment for this instance
814 "INSTANCE_NAME": name,
815 "INSTANCE_PRIMARY": primary_node,
816 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
817 "INSTANCE_OS_TYPE": os_type,
818 "INSTANCE_STATUS": str_status,
819 "INSTANCE_MEMORY": memory,
820 "INSTANCE_VCPUS": vcpus,
821 "INSTANCE_DISK_TEMPLATE": disk_template,
822 "INSTANCE_HYPERVISOR": hypervisor_name,
826 nic_count = len(nics)
827 for idx, (ip, mac, mode, link) in enumerate(nics):
830 env["INSTANCE_NIC%d_IP" % idx] = ip
831 env["INSTANCE_NIC%d_MAC" % idx] = mac
832 env["INSTANCE_NIC%d_MODE" % idx] = mode
833 env["INSTANCE_NIC%d_LINK" % idx] = link
834 if mode == constants.NIC_MODE_BRIDGED:
835 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
839 env["INSTANCE_NIC_COUNT"] = nic_count
842 disk_count = len(disks)
843 for idx, (size, mode) in enumerate(disks):
844 env["INSTANCE_DISK%d_SIZE" % idx] = size
845 env["INSTANCE_DISK%d_MODE" % idx] = mode
849 env["INSTANCE_DISK_COUNT"] = disk_count
851 for source, kind in [(bep, "BE"), (hvp, "HV")]:
852 for key, value in source.items():
853 env["INSTANCE_%s_%s" % (kind, key)] = value
858 def _NICListToTuple(lu, nics):
859 """Build a list of nic information tuples.
861 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
862 value in LUQueryInstanceData.
864 @type lu: L{LogicalUnit}
865 @param lu: the logical unit on whose behalf we execute
866 @type nics: list of L{objects.NIC}
867 @param nics: list of nics to convert to hooks tuples
871 cluster = lu.cfg.GetClusterInfo()
875 filled_params = cluster.SimpleFillNIC(nic.nicparams)
876 mode = filled_params[constants.NIC_MODE]
877 link = filled_params[constants.NIC_LINK]
878 hooks_nics.append((ip, mac, mode, link))
882 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
883 """Builds instance related env variables for hooks from an object.
885 @type lu: L{LogicalUnit}
886 @param lu: the logical unit on whose behalf we execute
887 @type instance: L{objects.Instance}
888 @param instance: the instance for which we should build the
891 @param override: dictionary with key/values that will override
894 @return: the hook environment dictionary
897 cluster = lu.cfg.GetClusterInfo()
898 bep = cluster.FillBE(instance)
899 hvp = cluster.FillHV(instance)
901 'name': instance.name,
902 'primary_node': instance.primary_node,
903 'secondary_nodes': instance.secondary_nodes,
904 'os_type': instance.os,
905 'status': instance.admin_up,
906 'memory': bep[constants.BE_MEMORY],
907 'vcpus': bep[constants.BE_VCPUS],
908 'nics': _NICListToTuple(lu, instance.nics),
909 'disk_template': instance.disk_template,
910 'disks': [(disk.size, disk.mode) for disk in instance.disks],
913 'hypervisor_name': instance.hypervisor,
916 args.update(override)
917 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
920 def _AdjustCandidatePool(lu, exceptions):
921 """Adjust the candidate pool after node operations.
924 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
926 lu.LogInfo("Promoted nodes to master candidate role: %s",
927 utils.CommaJoin(node.name for node in mod_list))
928 for name in mod_list:
929 lu.context.ReaddNode(name)
930 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
932 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
936 def _DecideSelfPromotion(lu, exceptions=None):
937 """Decide whether I should promote myself as a master candidate.
940 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
941 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
942 # the new node will increase mc_max with one, so:
943 mc_should = min(mc_should + 1, cp_size)
944 return mc_now < mc_should
947 def _CheckNicsBridgesExist(lu, target_nics, target_node):
948 """Check that the brigdes needed by a list of nics exist.
951 cluster = lu.cfg.GetClusterInfo()
952 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
953 brlist = [params[constants.NIC_LINK] for params in paramslist
954 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
956 result = lu.rpc.call_bridges_exist(target_node, brlist)
957 result.Raise("Error checking bridges on destination node '%s'" %
958 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
961 def _CheckInstanceBridgesExist(lu, instance, node=None):
962 """Check that the brigdes needed by an instance exist.
966 node = instance.primary_node
967 _CheckNicsBridgesExist(lu, instance.nics, node)
970 def _CheckOSVariant(os_obj, name):
971 """Check whether an OS name conforms to the os variants specification.
973 @type os_obj: L{objects.OS}
974 @param os_obj: OS object to check
976 @param name: OS name passed by the user, to check for validity
979 if not os_obj.supported_variants:
981 variant = objects.OS.GetVariant(name)
983 raise errors.OpPrereqError("OS name must include a variant",
986 if variant not in os_obj.supported_variants:
987 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
990 def _GetNodeInstancesInner(cfg, fn):
991 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
994 def _GetNodeInstances(cfg, node_name):
995 """Returns a list of all primary and secondary instances on a node.
999 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1002 def _GetNodePrimaryInstances(cfg, node_name):
1003 """Returns primary instances on a node.
1006 return _GetNodeInstancesInner(cfg,
1007 lambda inst: node_name == inst.primary_node)
1010 def _GetNodeSecondaryInstances(cfg, node_name):
1011 """Returns secondary instances on a node.
1014 return _GetNodeInstancesInner(cfg,
1015 lambda inst: node_name in inst.secondary_nodes)
1018 def _GetStorageTypeArgs(cfg, storage_type):
1019 """Returns the arguments for a storage type.
1022 # Special case for file storage
1023 if storage_type == constants.ST_FILE:
1024 # storage.FileStorage wants a list of storage directories
1025 return [[cfg.GetFileStorageDir()]]
1030 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1033 for dev in instance.disks:
1034 cfg.SetDiskID(dev, node_name)
1036 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1037 result.Raise("Failed to get disk status from node %s" % node_name,
1038 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1040 for idx, bdev_status in enumerate(result.payload):
1041 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1047 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1048 """Check the sanity of iallocator and node arguments and use the
1049 cluster-wide iallocator if appropriate.
1051 Check that at most one of (iallocator, node) is specified. If none is
1052 specified, then the LU's opcode's iallocator slot is filled with the
1053 cluster-wide default iallocator.
1055 @type iallocator_slot: string
1056 @param iallocator_slot: the name of the opcode iallocator slot
1057 @type node_slot: string
1058 @param node_slot: the name of the opcode target node slot
1061 node = getattr(lu.op, node_slot, None)
1062 iallocator = getattr(lu.op, iallocator_slot, None)
1064 if node is not None and iallocator is not None:
1065 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1067 elif node is None and iallocator is None:
1068 default_iallocator = lu.cfg.GetDefaultIAllocator()
1069 if default_iallocator:
1070 setattr(lu.op, iallocator_slot, default_iallocator)
1072 raise errors.OpPrereqError("No iallocator or node given and no"
1073 " cluster-wide default iallocator found."
1074 " Please specify either an iallocator or a"
1075 " node, or set a cluster-wide default"
1079 class LUPostInitCluster(LogicalUnit):
1080 """Logical unit for running hooks after cluster initialization.
1083 HPATH = "cluster-init"
1084 HTYPE = constants.HTYPE_CLUSTER
1086 def BuildHooksEnv(self):
1090 env = {"OP_TARGET": self.cfg.GetClusterName()}
1091 mn = self.cfg.GetMasterNode()
1092 return env, [], [mn]
1094 def Exec(self, feedback_fn):
1101 class LUDestroyCluster(LogicalUnit):
1102 """Logical unit for destroying the cluster.
1105 HPATH = "cluster-destroy"
1106 HTYPE = constants.HTYPE_CLUSTER
1108 def BuildHooksEnv(self):
1112 env = {"OP_TARGET": self.cfg.GetClusterName()}
1115 def CheckPrereq(self):
1116 """Check prerequisites.
1118 This checks whether the cluster is empty.
1120 Any errors are signaled by raising errors.OpPrereqError.
1123 master = self.cfg.GetMasterNode()
1125 nodelist = self.cfg.GetNodeList()
1126 if len(nodelist) != 1 or nodelist[0] != master:
1127 raise errors.OpPrereqError("There are still %d node(s) in"
1128 " this cluster." % (len(nodelist) - 1),
1130 instancelist = self.cfg.GetInstanceList()
1132 raise errors.OpPrereqError("There are still %d instance(s) in"
1133 " this cluster." % len(instancelist),
1136 def Exec(self, feedback_fn):
1137 """Destroys the cluster.
1140 master = self.cfg.GetMasterNode()
1142 # Run post hooks on master node before it's removed
1143 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1145 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1147 # pylint: disable-msg=W0702
1148 self.LogWarning("Errors occurred running hooks on %s" % master)
1150 result = self.rpc.call_node_stop_master(master, False)
1151 result.Raise("Could not disable the master role")
1156 def _VerifyCertificate(filename):
1157 """Verifies a certificate for LUVerifyCluster.
1159 @type filename: string
1160 @param filename: Path to PEM file
1164 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1165 utils.ReadFile(filename))
1166 except Exception, err: # pylint: disable-msg=W0703
1167 return (LUVerifyCluster.ETYPE_ERROR,
1168 "Failed to load X509 certificate %s: %s" % (filename, err))
1171 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1172 constants.SSL_CERT_EXPIRATION_ERROR)
1175 fnamemsg = "While verifying %s: %s" % (filename, msg)
1180 return (None, fnamemsg)
1181 elif errcode == utils.CERT_WARNING:
1182 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1183 elif errcode == utils.CERT_ERROR:
1184 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1186 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1189 class LUVerifyCluster(LogicalUnit):
1190 """Verifies the cluster status.
1193 HPATH = "cluster-verify"
1194 HTYPE = constants.HTYPE_CLUSTER
1196 ("skip_checks", ht.EmptyList,
1197 ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1198 ("verbose", False, ht.TBool),
1199 ("error_codes", False, ht.TBool),
1200 ("debug_simulate_errors", False, ht.TBool),
1204 TCLUSTER = "cluster"
1206 TINSTANCE = "instance"
1208 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1209 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1210 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1211 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1212 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1213 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1214 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1215 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1216 ENODEDRBD = (TNODE, "ENODEDRBD")
1217 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1218 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1219 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1220 ENODEHV = (TNODE, "ENODEHV")
1221 ENODELVM = (TNODE, "ENODELVM")
1222 ENODEN1 = (TNODE, "ENODEN1")
1223 ENODENET = (TNODE, "ENODENET")
1224 ENODEOS = (TNODE, "ENODEOS")
1225 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1226 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1227 ENODERPC = (TNODE, "ENODERPC")
1228 ENODESSH = (TNODE, "ENODESSH")
1229 ENODEVERSION = (TNODE, "ENODEVERSION")
1230 ENODESETUP = (TNODE, "ENODESETUP")
1231 ENODETIME = (TNODE, "ENODETIME")
1233 ETYPE_FIELD = "code"
1234 ETYPE_ERROR = "ERROR"
1235 ETYPE_WARNING = "WARNING"
1237 class NodeImage(object):
1238 """A class representing the logical and physical status of a node.
1241 @ivar name: the node name to which this object refers
1242 @ivar volumes: a structure as returned from
1243 L{ganeti.backend.GetVolumeList} (runtime)
1244 @ivar instances: a list of running instances (runtime)
1245 @ivar pinst: list of configured primary instances (config)
1246 @ivar sinst: list of configured secondary instances (config)
1247 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1248 of this node (config)
1249 @ivar mfree: free memory, as reported by hypervisor (runtime)
1250 @ivar dfree: free disk, as reported by the node (runtime)
1251 @ivar offline: the offline status (config)
1252 @type rpc_fail: boolean
1253 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1254 not whether the individual keys were correct) (runtime)
1255 @type lvm_fail: boolean
1256 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1257 @type hyp_fail: boolean
1258 @ivar hyp_fail: whether the RPC call didn't return the instance list
1259 @type ghost: boolean
1260 @ivar ghost: whether this is a known node or not (config)
1261 @type os_fail: boolean
1262 @ivar os_fail: whether the RPC call didn't return valid OS data
1264 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1265 @type vm_capable: boolean
1266 @ivar vm_capable: whether the node can host instances
1269 def __init__(self, offline=False, name=None, vm_capable=True):
1278 self.offline = offline
1279 self.vm_capable = vm_capable
1280 self.rpc_fail = False
1281 self.lvm_fail = False
1282 self.hyp_fail = False
1284 self.os_fail = False
1287 def ExpandNames(self):
1288 self.needed_locks = {
1289 locking.LEVEL_NODE: locking.ALL_SET,
1290 locking.LEVEL_INSTANCE: locking.ALL_SET,
1292 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1294 def _Error(self, ecode, item, msg, *args, **kwargs):
1295 """Format an error message.
1297 Based on the opcode's error_codes parameter, either format a
1298 parseable error code, or a simpler error string.
1300 This must be called only from Exec and functions called from Exec.
1303 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1305 # first complete the msg
1308 # then format the whole message
1309 if self.op.error_codes:
1310 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1316 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1317 # and finally report it via the feedback_fn
1318 self._feedback_fn(" - %s" % msg)
1320 def _ErrorIf(self, cond, *args, **kwargs):
1321 """Log an error message if the passed condition is True.
1324 cond = bool(cond) or self.op.debug_simulate_errors
1326 self._Error(*args, **kwargs)
1327 # do not mark the operation as failed for WARN cases only
1328 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1329 self.bad = self.bad or cond
1331 def _VerifyNode(self, ninfo, nresult):
1332 """Perform some basic validation on data returned from a node.
1334 - check the result data structure is well formed and has all the
1336 - check ganeti version
1338 @type ninfo: L{objects.Node}
1339 @param ninfo: the node to check
1340 @param nresult: the results from the node
1342 @return: whether overall this call was successful (and we can expect
1343 reasonable values in the respose)
1347 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1349 # main result, nresult should be a non-empty dict
1350 test = not nresult or not isinstance(nresult, dict)
1351 _ErrorIf(test, self.ENODERPC, node,
1352 "unable to verify node: no data returned")
1356 # compares ganeti version
1357 local_version = constants.PROTOCOL_VERSION
1358 remote_version = nresult.get("version", None)
1359 test = not (remote_version and
1360 isinstance(remote_version, (list, tuple)) and
1361 len(remote_version) == 2)
1362 _ErrorIf(test, self.ENODERPC, node,
1363 "connection to node returned invalid data")
1367 test = local_version != remote_version[0]
1368 _ErrorIf(test, self.ENODEVERSION, node,
1369 "incompatible protocol versions: master %s,"
1370 " node %s", local_version, remote_version[0])
1374 # node seems compatible, we can actually try to look into its results
1376 # full package version
1377 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1378 self.ENODEVERSION, node,
1379 "software version mismatch: master %s, node %s",
1380 constants.RELEASE_VERSION, remote_version[1],
1381 code=self.ETYPE_WARNING)
1383 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1384 if ninfo.vm_capable and isinstance(hyp_result, dict):
1385 for hv_name, hv_result in hyp_result.iteritems():
1386 test = hv_result is not None
1387 _ErrorIf(test, self.ENODEHV, node,
1388 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1390 test = nresult.get(constants.NV_NODESETUP,
1391 ["Missing NODESETUP results"])
1392 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1397 def _VerifyNodeTime(self, ninfo, nresult,
1398 nvinfo_starttime, nvinfo_endtime):
1399 """Check the node time.
1401 @type ninfo: L{objects.Node}
1402 @param ninfo: the node to check
1403 @param nresult: the remote results for the node
1404 @param nvinfo_starttime: the start time of the RPC call
1405 @param nvinfo_endtime: the end time of the RPC call
1409 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1411 ntime = nresult.get(constants.NV_TIME, None)
1413 ntime_merged = utils.MergeTime(ntime)
1414 except (ValueError, TypeError):
1415 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1418 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1419 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1420 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1421 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1425 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1426 "Node time diverges by at least %s from master node time",
1429 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1430 """Check the node time.
1432 @type ninfo: L{objects.Node}
1433 @param ninfo: the node to check
1434 @param nresult: the remote results for the node
1435 @param vg_name: the configured VG name
1442 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1444 # checks vg existence and size > 20G
1445 vglist = nresult.get(constants.NV_VGLIST, None)
1447 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1449 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1450 constants.MIN_VG_SIZE)
1451 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1454 pvlist = nresult.get(constants.NV_PVLIST, None)
1455 test = pvlist is None
1456 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1458 # check that ':' is not present in PV names, since it's a
1459 # special character for lvcreate (denotes the range of PEs to
1461 for _, pvname, owner_vg in pvlist:
1462 test = ":" in pvname
1463 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1464 " '%s' of VG '%s'", pvname, owner_vg)
1466 def _VerifyNodeNetwork(self, ninfo, nresult):
1467 """Check the node time.
1469 @type ninfo: L{objects.Node}
1470 @param ninfo: the node to check
1471 @param nresult: the remote results for the node
1475 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1477 test = constants.NV_NODELIST not in nresult
1478 _ErrorIf(test, self.ENODESSH, node,
1479 "node hasn't returned node ssh connectivity data")
1481 if nresult[constants.NV_NODELIST]:
1482 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1483 _ErrorIf(True, self.ENODESSH, node,
1484 "ssh communication with node '%s': %s", a_node, a_msg)
1486 test = constants.NV_NODENETTEST not in nresult
1487 _ErrorIf(test, self.ENODENET, node,
1488 "node hasn't returned node tcp connectivity data")
1490 if nresult[constants.NV_NODENETTEST]:
1491 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1493 _ErrorIf(True, self.ENODENET, node,
1494 "tcp communication with node '%s': %s",
1495 anode, nresult[constants.NV_NODENETTEST][anode])
1497 test = constants.NV_MASTERIP not in nresult
1498 _ErrorIf(test, self.ENODENET, node,
1499 "node hasn't returned node master IP reachability data")
1501 if not nresult[constants.NV_MASTERIP]:
1502 if node == self.master_node:
1503 msg = "the master node cannot reach the master IP (not configured?)"
1505 msg = "cannot reach the master IP"
1506 _ErrorIf(True, self.ENODENET, node, msg)
1508 def _VerifyInstance(self, instance, instanceconfig, node_image,
1510 """Verify an instance.
1512 This function checks to see if the required block devices are
1513 available on the instance's node.
1516 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1517 node_current = instanceconfig.primary_node
1519 node_vol_should = {}
1520 instanceconfig.MapLVsByNode(node_vol_should)
1522 for node in node_vol_should:
1523 n_img = node_image[node]
1524 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1525 # ignore missing volumes on offline or broken nodes
1527 for volume in node_vol_should[node]:
1528 test = volume not in n_img.volumes
1529 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1530 "volume %s missing on node %s", volume, node)
1532 if instanceconfig.admin_up:
1533 pri_img = node_image[node_current]
1534 test = instance not in pri_img.instances and not pri_img.offline
1535 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1536 "instance not running on its primary node %s",
1539 for node, n_img in node_image.items():
1540 if (not node == node_current):
1541 test = instance in n_img.instances
1542 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1543 "instance should not run on node %s", node)
1545 diskdata = [(nname, success, status, idx)
1546 for (nname, disks) in diskstatus.items()
1547 for idx, (success, status) in enumerate(disks)]
1549 for nname, success, bdev_status, idx in diskdata:
1550 _ErrorIf(instanceconfig.admin_up and not success,
1551 self.EINSTANCEFAULTYDISK, instance,
1552 "couldn't retrieve status for disk/%s on %s: %s",
1553 idx, nname, bdev_status)
1554 _ErrorIf((instanceconfig.admin_up and success and
1555 bdev_status.ldisk_status == constants.LDS_FAULTY),
1556 self.EINSTANCEFAULTYDISK, instance,
1557 "disk/%s on %s is faulty", idx, nname)
1559 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1560 """Verify if there are any unknown volumes in the cluster.
1562 The .os, .swap and backup volumes are ignored. All other volumes are
1563 reported as unknown.
1565 @type reserved: L{ganeti.utils.FieldSet}
1566 @param reserved: a FieldSet of reserved volume names
1569 for node, n_img in node_image.items():
1570 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1571 # skip non-healthy nodes
1573 for volume in n_img.volumes:
1574 test = ((node not in node_vol_should or
1575 volume not in node_vol_should[node]) and
1576 not reserved.Matches(volume))
1577 self._ErrorIf(test, self.ENODEORPHANLV, node,
1578 "volume %s is unknown", volume)
1580 def _VerifyOrphanInstances(self, instancelist, node_image):
1581 """Verify the list of running instances.
1583 This checks what instances are running but unknown to the cluster.
1586 for node, n_img in node_image.items():
1587 for o_inst in n_img.instances:
1588 test = o_inst not in instancelist
1589 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1590 "instance %s on node %s should not exist", o_inst, node)
1592 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1593 """Verify N+1 Memory Resilience.
1595 Check that if one single node dies we can still start all the
1596 instances it was primary for.
1599 for node, n_img in node_image.items():
1600 # This code checks that every node which is now listed as
1601 # secondary has enough memory to host all instances it is
1602 # supposed to should a single other node in the cluster fail.
1603 # FIXME: not ready for failover to an arbitrary node
1604 # FIXME: does not support file-backed instances
1605 # WARNING: we currently take into account down instances as well
1606 # as up ones, considering that even if they're down someone
1607 # might want to start them even in the event of a node failure.
1608 for prinode, instances in n_img.sbp.items():
1610 for instance in instances:
1611 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1612 if bep[constants.BE_AUTO_BALANCE]:
1613 needed_mem += bep[constants.BE_MEMORY]
1614 test = n_img.mfree < needed_mem
1615 self._ErrorIf(test, self.ENODEN1, node,
1616 "not enough memory on to accommodate"
1617 " failovers should peer node %s fail", prinode)
1619 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1621 """Verifies and computes the node required file checksums.
1623 @type ninfo: L{objects.Node}
1624 @param ninfo: the node to check
1625 @param nresult: the remote results for the node
1626 @param file_list: required list of files
1627 @param local_cksum: dictionary of local files and their checksums
1628 @param master_files: list of files that only masters should have
1632 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1634 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1635 test = not isinstance(remote_cksum, dict)
1636 _ErrorIf(test, self.ENODEFILECHECK, node,
1637 "node hasn't returned file checksum data")
1641 for file_name in file_list:
1642 node_is_mc = ninfo.master_candidate
1643 must_have = (file_name not in master_files) or node_is_mc
1645 test1 = file_name not in remote_cksum
1647 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1649 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1650 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1651 "file '%s' missing", file_name)
1652 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1653 "file '%s' has wrong checksum", file_name)
1654 # not candidate and this is not a must-have file
1655 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1656 "file '%s' should not exist on non master"
1657 " candidates (and the file is outdated)", file_name)
1658 # all good, except non-master/non-must have combination
1659 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1660 "file '%s' should not exist"
1661 " on non master candidates", file_name)
1663 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1665 """Verifies and the node DRBD status.
1667 @type ninfo: L{objects.Node}
1668 @param ninfo: the node to check
1669 @param nresult: the remote results for the node
1670 @param instanceinfo: the dict of instances
1671 @param drbd_helper: the configured DRBD usermode helper
1672 @param drbd_map: the DRBD map as returned by
1673 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1677 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1680 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1681 test = (helper_result == None)
1682 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1683 "no drbd usermode helper returned")
1685 status, payload = helper_result
1687 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1688 "drbd usermode helper check unsuccessful: %s", payload)
1689 test = status and (payload != drbd_helper)
1690 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1691 "wrong drbd usermode helper: %s", payload)
1693 # compute the DRBD minors
1695 for minor, instance in drbd_map[node].items():
1696 test = instance not in instanceinfo
1697 _ErrorIf(test, self.ECLUSTERCFG, None,
1698 "ghost instance '%s' in temporary DRBD map", instance)
1699 # ghost instance should not be running, but otherwise we
1700 # don't give double warnings (both ghost instance and
1701 # unallocated minor in use)
1703 node_drbd[minor] = (instance, False)
1705 instance = instanceinfo[instance]
1706 node_drbd[minor] = (instance.name, instance.admin_up)
1708 # and now check them
1709 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1710 test = not isinstance(used_minors, (tuple, list))
1711 _ErrorIf(test, self.ENODEDRBD, node,
1712 "cannot parse drbd status file: %s", str(used_minors))
1714 # we cannot check drbd status
1717 for minor, (iname, must_exist) in node_drbd.items():
1718 test = minor not in used_minors and must_exist
1719 _ErrorIf(test, self.ENODEDRBD, node,
1720 "drbd minor %d of instance %s is not active", minor, iname)
1721 for minor in used_minors:
1722 test = minor not in node_drbd
1723 _ErrorIf(test, self.ENODEDRBD, node,
1724 "unallocated drbd minor %d is in use", minor)
1726 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1727 """Builds the node OS structures.
1729 @type ninfo: L{objects.Node}
1730 @param ninfo: the node to check
1731 @param nresult: the remote results for the node
1732 @param nimg: the node image object
1736 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1738 remote_os = nresult.get(constants.NV_OSLIST, None)
1739 test = (not isinstance(remote_os, list) or
1740 not compat.all(isinstance(v, list) and len(v) == 7
1741 for v in remote_os))
1743 _ErrorIf(test, self.ENODEOS, node,
1744 "node hasn't returned valid OS data")
1753 for (name, os_path, status, diagnose,
1754 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1756 if name not in os_dict:
1759 # parameters is a list of lists instead of list of tuples due to
1760 # JSON lacking a real tuple type, fix it:
1761 parameters = [tuple(v) for v in parameters]
1762 os_dict[name].append((os_path, status, diagnose,
1763 set(variants), set(parameters), set(api_ver)))
1765 nimg.oslist = os_dict
1767 def _VerifyNodeOS(self, ninfo, nimg, base):
1768 """Verifies the node OS list.
1770 @type ninfo: L{objects.Node}
1771 @param ninfo: the node to check
1772 @param nimg: the node image object
1773 @param base: the 'template' node we match against (e.g. from the master)
1777 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1779 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1781 for os_name, os_data in nimg.oslist.items():
1782 assert os_data, "Empty OS status for OS %s?!" % os_name
1783 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1784 _ErrorIf(not f_status, self.ENODEOS, node,
1785 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1786 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1787 "OS '%s' has multiple entries (first one shadows the rest): %s",
1788 os_name, utils.CommaJoin([v[0] for v in os_data]))
1789 # this will catched in backend too
1790 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1791 and not f_var, self.ENODEOS, node,
1792 "OS %s with API at least %d does not declare any variant",
1793 os_name, constants.OS_API_V15)
1794 # comparisons with the 'base' image
1795 test = os_name not in base.oslist
1796 _ErrorIf(test, self.ENODEOS, node,
1797 "Extra OS %s not present on reference node (%s)",
1801 assert base.oslist[os_name], "Base node has empty OS status?"
1802 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1804 # base OS is invalid, skipping
1806 for kind, a, b in [("API version", f_api, b_api),
1807 ("variants list", f_var, b_var),
1808 ("parameters", f_param, b_param)]:
1809 _ErrorIf(a != b, self.ENODEOS, node,
1810 "OS %s %s differs from reference node %s: %s vs. %s",
1811 kind, os_name, base.name,
1812 utils.CommaJoin(a), utils.CommaJoin(b))
1814 # check any missing OSes
1815 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1816 _ErrorIf(missing, self.ENODEOS, node,
1817 "OSes present on reference node %s but missing on this node: %s",
1818 base.name, utils.CommaJoin(missing))
1820 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1821 """Verifies and updates the node volume data.
1823 This function will update a L{NodeImage}'s internal structures
1824 with data from the remote call.
1826 @type ninfo: L{objects.Node}
1827 @param ninfo: the node to check
1828 @param nresult: the remote results for the node
1829 @param nimg: the node image object
1830 @param vg_name: the configured VG name
1834 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1836 nimg.lvm_fail = True
1837 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1840 elif isinstance(lvdata, basestring):
1841 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1842 utils.SafeEncode(lvdata))
1843 elif not isinstance(lvdata, dict):
1844 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1846 nimg.volumes = lvdata
1847 nimg.lvm_fail = False
1849 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1850 """Verifies and updates the node instance list.
1852 If the listing was successful, then updates this node's instance
1853 list. Otherwise, it marks the RPC call as failed for the instance
1856 @type ninfo: L{objects.Node}
1857 @param ninfo: the node to check
1858 @param nresult: the remote results for the node
1859 @param nimg: the node image object
1862 idata = nresult.get(constants.NV_INSTANCELIST, None)
1863 test = not isinstance(idata, list)
1864 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1865 " (instancelist): %s", utils.SafeEncode(str(idata)))
1867 nimg.hyp_fail = True
1869 nimg.instances = idata
1871 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1872 """Verifies and computes a node information map
1874 @type ninfo: L{objects.Node}
1875 @param ninfo: the node to check
1876 @param nresult: the remote results for the node
1877 @param nimg: the node image object
1878 @param vg_name: the configured VG name
1882 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1884 # try to read free memory (from the hypervisor)
1885 hv_info = nresult.get(constants.NV_HVINFO, None)
1886 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1887 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1890 nimg.mfree = int(hv_info["memory_free"])
1891 except (ValueError, TypeError):
1892 _ErrorIf(True, self.ENODERPC, node,
1893 "node returned invalid nodeinfo, check hypervisor")
1895 # FIXME: devise a free space model for file based instances as well
1896 if vg_name is not None:
1897 test = (constants.NV_VGLIST not in nresult or
1898 vg_name not in nresult[constants.NV_VGLIST])
1899 _ErrorIf(test, self.ENODELVM, node,
1900 "node didn't return data for the volume group '%s'"
1901 " - it is either missing or broken", vg_name)
1904 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1905 except (ValueError, TypeError):
1906 _ErrorIf(True, self.ENODERPC, node,
1907 "node returned invalid LVM info, check LVM status")
1909 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1910 """Gets per-disk status information for all instances.
1912 @type nodelist: list of strings
1913 @param nodelist: Node names
1914 @type node_image: dict of (name, L{objects.Node})
1915 @param node_image: Node objects
1916 @type instanceinfo: dict of (name, L{objects.Instance})
1917 @param instanceinfo: Instance objects
1918 @rtype: {instance: {node: [(succes, payload)]}}
1919 @return: a dictionary of per-instance dictionaries with nodes as
1920 keys and disk information as values; the disk information is a
1921 list of tuples (success, payload)
1924 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1927 node_disks_devonly = {}
1928 diskless_instances = set()
1929 diskless = constants.DT_DISKLESS
1931 for nname in nodelist:
1932 node_instances = list(itertools.chain(node_image[nname].pinst,
1933 node_image[nname].sinst))
1934 diskless_instances.update(inst for inst in node_instances
1935 if instanceinfo[inst].disk_template == diskless)
1936 disks = [(inst, disk)
1937 for inst in node_instances
1938 for disk in instanceinfo[inst].disks]
1941 # No need to collect data
1944 node_disks[nname] = disks
1946 # Creating copies as SetDiskID below will modify the objects and that can
1947 # lead to incorrect data returned from nodes
1948 devonly = [dev.Copy() for (_, dev) in disks]
1951 self.cfg.SetDiskID(dev, nname)
1953 node_disks_devonly[nname] = devonly
1955 assert len(node_disks) == len(node_disks_devonly)
1957 # Collect data from all nodes with disks
1958 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1961 assert len(result) == len(node_disks)
1965 for (nname, nres) in result.items():
1966 disks = node_disks[nname]
1969 # No data from this node
1970 data = len(disks) * [(False, "node offline")]
1973 _ErrorIf(msg, self.ENODERPC, nname,
1974 "while getting disk information: %s", msg)
1976 # No data from this node
1977 data = len(disks) * [(False, msg)]
1980 for idx, i in enumerate(nres.payload):
1981 if isinstance(i, (tuple, list)) and len(i) == 2:
1984 logging.warning("Invalid result from node %s, entry %d: %s",
1986 data.append((False, "Invalid result from the remote node"))
1988 for ((inst, _), status) in zip(disks, data):
1989 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
1991 # Add empty entries for diskless instances.
1992 for inst in diskless_instances:
1993 assert inst not in instdisk
1996 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
1997 len(nnames) <= len(instanceinfo[inst].all_nodes) and
1998 compat.all(isinstance(s, (tuple, list)) and
1999 len(s) == 2 for s in statuses)
2000 for inst, nnames in instdisk.items()
2001 for nname, statuses in nnames.items())
2002 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2006 def BuildHooksEnv(self):
2009 Cluster-Verify hooks just ran in the post phase and their failure makes
2010 the output be logged in the verify output and the verification to fail.
2013 all_nodes = self.cfg.GetNodeList()
2015 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2017 for node in self.cfg.GetAllNodesInfo().values():
2018 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2020 return env, [], all_nodes
2022 def Exec(self, feedback_fn):
2023 """Verify integrity of cluster, performing various test on nodes.
2027 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2028 verbose = self.op.verbose
2029 self._feedback_fn = feedback_fn
2030 feedback_fn("* Verifying global settings")
2031 for msg in self.cfg.VerifyConfig():
2032 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2034 # Check the cluster certificates
2035 for cert_filename in constants.ALL_CERT_FILES:
2036 (errcode, msg) = _VerifyCertificate(cert_filename)
2037 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2039 vg_name = self.cfg.GetVGName()
2040 drbd_helper = self.cfg.GetDRBDHelper()
2041 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2042 cluster = self.cfg.GetClusterInfo()
2043 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2044 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2045 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2046 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2047 for iname in instancelist)
2048 i_non_redundant = [] # Non redundant instances
2049 i_non_a_balanced = [] # Non auto-balanced instances
2050 n_offline = 0 # Count of offline nodes
2051 n_drained = 0 # Count of nodes being drained
2052 node_vol_should = {}
2054 # FIXME: verify OS list
2055 # do local checksums
2056 master_files = [constants.CLUSTER_CONF_FILE]
2057 master_node = self.master_node = self.cfg.GetMasterNode()
2058 master_ip = self.cfg.GetMasterIP()
2060 file_names = ssconf.SimpleStore().GetFileList()
2061 file_names.extend(constants.ALL_CERT_FILES)
2062 file_names.extend(master_files)
2063 if cluster.modify_etc_hosts:
2064 file_names.append(constants.ETC_HOSTS)
2066 local_checksums = utils.FingerprintFiles(file_names)
2068 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2069 node_verify_param = {
2070 constants.NV_FILELIST: file_names,
2071 constants.NV_NODELIST: [node.name for node in nodeinfo
2072 if not node.offline],
2073 constants.NV_HYPERVISOR: hypervisors,
2074 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2075 node.secondary_ip) for node in nodeinfo
2076 if not node.offline],
2077 constants.NV_INSTANCELIST: hypervisors,
2078 constants.NV_VERSION: None,
2079 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2080 constants.NV_NODESETUP: None,
2081 constants.NV_TIME: None,
2082 constants.NV_MASTERIP: (master_node, master_ip),
2083 constants.NV_OSLIST: None,
2084 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2087 if vg_name is not None:
2088 node_verify_param[constants.NV_VGLIST] = None
2089 node_verify_param[constants.NV_LVLIST] = vg_name
2090 node_verify_param[constants.NV_PVLIST] = [vg_name]
2091 node_verify_param[constants.NV_DRBDLIST] = None
2094 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2096 # Build our expected cluster state
2097 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2099 vm_capable=node.vm_capable))
2100 for node in nodeinfo)
2102 for instance in instancelist:
2103 inst_config = instanceinfo[instance]
2105 for nname in inst_config.all_nodes:
2106 if nname not in node_image:
2108 gnode = self.NodeImage(name=nname)
2110 node_image[nname] = gnode
2112 inst_config.MapLVsByNode(node_vol_should)
2114 pnode = inst_config.primary_node
2115 node_image[pnode].pinst.append(instance)
2117 for snode in inst_config.secondary_nodes:
2118 nimg = node_image[snode]
2119 nimg.sinst.append(instance)
2120 if pnode not in nimg.sbp:
2121 nimg.sbp[pnode] = []
2122 nimg.sbp[pnode].append(instance)
2124 # At this point, we have the in-memory data structures complete,
2125 # except for the runtime information, which we'll gather next
2127 # Due to the way our RPC system works, exact response times cannot be
2128 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2129 # time before and after executing the request, we can at least have a time
2131 nvinfo_starttime = time.time()
2132 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2133 self.cfg.GetClusterName())
2134 nvinfo_endtime = time.time()
2136 all_drbd_map = self.cfg.ComputeDRBDMap()
2138 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2139 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2141 feedback_fn("* Verifying node status")
2145 for node_i in nodeinfo:
2147 nimg = node_image[node]
2151 feedback_fn("* Skipping offline node %s" % (node,))
2155 if node == master_node:
2157 elif node_i.master_candidate:
2158 ntype = "master candidate"
2159 elif node_i.drained:
2165 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2167 msg = all_nvinfo[node].fail_msg
2168 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2170 nimg.rpc_fail = True
2173 nresult = all_nvinfo[node].payload
2175 nimg.call_ok = self._VerifyNode(node_i, nresult)
2176 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2177 self._VerifyNodeNetwork(node_i, nresult)
2178 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2182 self._VerifyNodeLVM(node_i, nresult, vg_name)
2183 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2186 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2187 self._UpdateNodeInstances(node_i, nresult, nimg)
2188 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2189 self._UpdateNodeOS(node_i, nresult, nimg)
2190 if not nimg.os_fail:
2191 if refos_img is None:
2193 self._VerifyNodeOS(node_i, nimg, refos_img)
2195 feedback_fn("* Verifying instance status")
2196 for instance in instancelist:
2198 feedback_fn("* Verifying instance %s" % instance)
2199 inst_config = instanceinfo[instance]
2200 self._VerifyInstance(instance, inst_config, node_image,
2202 inst_nodes_offline = []
2204 pnode = inst_config.primary_node
2205 pnode_img = node_image[pnode]
2206 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2207 self.ENODERPC, pnode, "instance %s, connection to"
2208 " primary node failed", instance)
2210 if pnode_img.offline:
2211 inst_nodes_offline.append(pnode)
2213 # If the instance is non-redundant we cannot survive losing its primary
2214 # node, so we are not N+1 compliant. On the other hand we have no disk
2215 # templates with more than one secondary so that situation is not well
2217 # FIXME: does not support file-backed instances
2218 if not inst_config.secondary_nodes:
2219 i_non_redundant.append(instance)
2220 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2221 instance, "instance has multiple secondary nodes: %s",
2222 utils.CommaJoin(inst_config.secondary_nodes),
2223 code=self.ETYPE_WARNING)
2225 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2226 i_non_a_balanced.append(instance)
2228 for snode in inst_config.secondary_nodes:
2229 s_img = node_image[snode]
2230 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2231 "instance %s, connection to secondary node failed", instance)
2234 inst_nodes_offline.append(snode)
2236 # warn that the instance lives on offline nodes
2237 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2238 "instance lives on offline node(s) %s",
2239 utils.CommaJoin(inst_nodes_offline))
2240 # ... or ghost/non-vm_capable nodes
2241 for node in inst_config.all_nodes:
2242 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2243 "instance lives on ghost node %s", node)
2244 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2245 instance, "instance lives on non-vm_capable node %s", node)
2247 feedback_fn("* Verifying orphan volumes")
2248 reserved = utils.FieldSet(*cluster.reserved_lvs)
2249 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2251 feedback_fn("* Verifying orphan instances")
2252 self._VerifyOrphanInstances(instancelist, node_image)
2254 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2255 feedback_fn("* Verifying N+1 Memory redundancy")
2256 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2258 feedback_fn("* Other Notes")
2260 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2261 % len(i_non_redundant))
2263 if i_non_a_balanced:
2264 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2265 % len(i_non_a_balanced))
2268 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2271 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2275 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2276 """Analyze the post-hooks' result
2278 This method analyses the hook result, handles it, and sends some
2279 nicely-formatted feedback back to the user.
2281 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2282 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2283 @param hooks_results: the results of the multi-node hooks rpc call
2284 @param feedback_fn: function used send feedback back to the caller
2285 @param lu_result: previous Exec result
2286 @return: the new Exec result, based on the previous result
2290 # We only really run POST phase hooks, and are only interested in
2292 if phase == constants.HOOKS_PHASE_POST:
2293 # Used to change hooks' output to proper indentation
2294 indent_re = re.compile('^', re.M)
2295 feedback_fn("* Hooks Results")
2296 assert hooks_results, "invalid result from hooks"
2298 for node_name in hooks_results:
2299 res = hooks_results[node_name]
2301 test = msg and not res.offline
2302 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2303 "Communication failure in hooks execution: %s", msg)
2304 if res.offline or msg:
2305 # No need to investigate payload if node is offline or gave an error.
2306 # override manually lu_result here as _ErrorIf only
2307 # overrides self.bad
2310 for script, hkr, output in res.payload:
2311 test = hkr == constants.HKR_FAIL
2312 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2313 "Script %s failed, output:", script)
2315 output = indent_re.sub(' ', output)
2316 feedback_fn("%s" % output)
2322 class LUVerifyDisks(NoHooksLU):
2323 """Verifies the cluster disks status.
2328 def ExpandNames(self):
2329 self.needed_locks = {
2330 locking.LEVEL_NODE: locking.ALL_SET,
2331 locking.LEVEL_INSTANCE: locking.ALL_SET,
2333 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2335 def Exec(self, feedback_fn):
2336 """Verify integrity of cluster disks.
2338 @rtype: tuple of three items
2339 @return: a tuple of (dict of node-to-node_error, list of instances
2340 which need activate-disks, dict of instance: (node, volume) for
2344 result = res_nodes, res_instances, res_missing = {}, [], {}
2346 vg_name = self.cfg.GetVGName()
2347 nodes = utils.NiceSort(self.cfg.GetNodeList())
2348 instances = [self.cfg.GetInstanceInfo(name)
2349 for name in self.cfg.GetInstanceList()]
2352 for inst in instances:
2354 if (not inst.admin_up or
2355 inst.disk_template not in constants.DTS_NET_MIRROR):
2357 inst.MapLVsByNode(inst_lvs)
2358 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2359 for node, vol_list in inst_lvs.iteritems():
2360 for vol in vol_list:
2361 nv_dict[(node, vol)] = inst
2366 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2370 node_res = node_lvs[node]
2371 if node_res.offline:
2373 msg = node_res.fail_msg
2375 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2376 res_nodes[node] = msg
2379 lvs = node_res.payload
2380 for lv_name, (_, _, lv_online) in lvs.items():
2381 inst = nv_dict.pop((node, lv_name), None)
2382 if (not lv_online and inst is not None
2383 and inst.name not in res_instances):
2384 res_instances.append(inst.name)
2386 # any leftover items in nv_dict are missing LVs, let's arrange the
2388 for key, inst in nv_dict.iteritems():
2389 if inst.name not in res_missing:
2390 res_missing[inst.name] = []
2391 res_missing[inst.name].append(key)
2396 class LURepairDiskSizes(NoHooksLU):
2397 """Verifies the cluster disks sizes.
2400 _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2403 def ExpandNames(self):
2404 if self.op.instances:
2405 self.wanted_names = []
2406 for name in self.op.instances:
2407 full_name = _ExpandInstanceName(self.cfg, name)
2408 self.wanted_names.append(full_name)
2409 self.needed_locks = {
2410 locking.LEVEL_NODE: [],
2411 locking.LEVEL_INSTANCE: self.wanted_names,
2413 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2415 self.wanted_names = None
2416 self.needed_locks = {
2417 locking.LEVEL_NODE: locking.ALL_SET,
2418 locking.LEVEL_INSTANCE: locking.ALL_SET,
2420 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2422 def DeclareLocks(self, level):
2423 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2424 self._LockInstancesNodes(primary_only=True)
2426 def CheckPrereq(self):
2427 """Check prerequisites.
2429 This only checks the optional instance list against the existing names.
2432 if self.wanted_names is None:
2433 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2435 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2436 in self.wanted_names]
2438 def _EnsureChildSizes(self, disk):
2439 """Ensure children of the disk have the needed disk size.
2441 This is valid mainly for DRBD8 and fixes an issue where the
2442 children have smaller disk size.
2444 @param disk: an L{ganeti.objects.Disk} object
2447 if disk.dev_type == constants.LD_DRBD8:
2448 assert disk.children, "Empty children for DRBD8?"
2449 fchild = disk.children[0]
2450 mismatch = fchild.size < disk.size
2452 self.LogInfo("Child disk has size %d, parent %d, fixing",
2453 fchild.size, disk.size)
2454 fchild.size = disk.size
2456 # and we recurse on this child only, not on the metadev
2457 return self._EnsureChildSizes(fchild) or mismatch
2461 def Exec(self, feedback_fn):
2462 """Verify the size of cluster disks.
2465 # TODO: check child disks too
2466 # TODO: check differences in size between primary/secondary nodes
2468 for instance in self.wanted_instances:
2469 pnode = instance.primary_node
2470 if pnode not in per_node_disks:
2471 per_node_disks[pnode] = []
2472 for idx, disk in enumerate(instance.disks):
2473 per_node_disks[pnode].append((instance, idx, disk))
2476 for node, dskl in per_node_disks.items():
2477 newl = [v[2].Copy() for v in dskl]
2479 self.cfg.SetDiskID(dsk, node)
2480 result = self.rpc.call_blockdev_getsizes(node, newl)
2482 self.LogWarning("Failure in blockdev_getsizes call to node"
2483 " %s, ignoring", node)
2485 if len(result.data) != len(dskl):
2486 self.LogWarning("Invalid result from node %s, ignoring node results",
2489 for ((instance, idx, disk), size) in zip(dskl, result.data):
2491 self.LogWarning("Disk %d of instance %s did not return size"
2492 " information, ignoring", idx, instance.name)
2494 if not isinstance(size, (int, long)):
2495 self.LogWarning("Disk %d of instance %s did not return valid"
2496 " size information, ignoring", idx, instance.name)
2499 if size != disk.size:
2500 self.LogInfo("Disk %d of instance %s has mismatched size,"
2501 " correcting: recorded %d, actual %d", idx,
2502 instance.name, disk.size, size)
2504 self.cfg.Update(instance, feedback_fn)
2505 changed.append((instance.name, idx, size))
2506 if self._EnsureChildSizes(disk):
2507 self.cfg.Update(instance, feedback_fn)
2508 changed.append((instance.name, idx, disk.size))
2512 class LURenameCluster(LogicalUnit):
2513 """Rename the cluster.
2516 HPATH = "cluster-rename"
2517 HTYPE = constants.HTYPE_CLUSTER
2518 _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2520 def BuildHooksEnv(self):
2525 "OP_TARGET": self.cfg.GetClusterName(),
2526 "NEW_NAME": self.op.name,
2528 mn = self.cfg.GetMasterNode()
2529 all_nodes = self.cfg.GetNodeList()
2530 return env, [mn], all_nodes
2532 def CheckPrereq(self):
2533 """Verify that the passed name is a valid one.
2536 hostname = netutils.GetHostname(name=self.op.name,
2537 family=self.cfg.GetPrimaryIPFamily())
2539 new_name = hostname.name
2540 self.ip = new_ip = hostname.ip
2541 old_name = self.cfg.GetClusterName()
2542 old_ip = self.cfg.GetMasterIP()
2543 if new_name == old_name and new_ip == old_ip:
2544 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2545 " cluster has changed",
2547 if new_ip != old_ip:
2548 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2549 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2550 " reachable on the network" %
2551 new_ip, errors.ECODE_NOTUNIQUE)
2553 self.op.name = new_name
2555 def Exec(self, feedback_fn):
2556 """Rename the cluster.
2559 clustername = self.op.name
2562 # shutdown the master IP
2563 master = self.cfg.GetMasterNode()
2564 result = self.rpc.call_node_stop_master(master, False)
2565 result.Raise("Could not disable the master role")
2568 cluster = self.cfg.GetClusterInfo()
2569 cluster.cluster_name = clustername
2570 cluster.master_ip = ip
2571 self.cfg.Update(cluster, feedback_fn)
2573 # update the known hosts file
2574 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2575 node_list = self.cfg.GetNodeList()
2577 node_list.remove(master)
2580 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2582 result = self.rpc.call_node_start_master(master, False, False)
2583 msg = result.fail_msg
2585 self.LogWarning("Could not re-enable the master role on"
2586 " the master, please restart manually: %s", msg)
2591 class LUSetClusterParams(LogicalUnit):
2592 """Change the parameters of the cluster.
2595 HPATH = "cluster-modify"
2596 HTYPE = constants.HTYPE_CLUSTER
2598 ("vg_name", None, ht.TMaybeString),
2599 ("enabled_hypervisors", None,
2600 ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2602 ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2604 ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2605 ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2607 ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2609 ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2610 ("uid_pool", None, ht.NoType),
2611 ("add_uids", None, ht.NoType),
2612 ("remove_uids", None, ht.NoType),
2613 ("maintain_node_health", None, ht.TMaybeBool),
2614 ("prealloc_wipe_disks", None, ht.TMaybeBool),
2615 ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2616 ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2617 ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2618 ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2619 ("hidden_os", None, ht.TOr(ht.TListOf(\
2622 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2624 ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2627 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2632 def CheckArguments(self):
2636 if self.op.uid_pool:
2637 uidpool.CheckUidPool(self.op.uid_pool)
2639 if self.op.add_uids:
2640 uidpool.CheckUidPool(self.op.add_uids)
2642 if self.op.remove_uids:
2643 uidpool.CheckUidPool(self.op.remove_uids)
2645 def ExpandNames(self):
2646 # FIXME: in the future maybe other cluster params won't require checking on
2647 # all nodes to be modified.
2648 self.needed_locks = {
2649 locking.LEVEL_NODE: locking.ALL_SET,
2651 self.share_locks[locking.LEVEL_NODE] = 1
2653 def BuildHooksEnv(self):
2658 "OP_TARGET": self.cfg.GetClusterName(),
2659 "NEW_VG_NAME": self.op.vg_name,
2661 mn = self.cfg.GetMasterNode()
2662 return env, [mn], [mn]
2664 def CheckPrereq(self):
2665 """Check prerequisites.
2667 This checks whether the given params don't conflict and
2668 if the given volume group is valid.
2671 if self.op.vg_name is not None and not self.op.vg_name:
2672 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2673 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2674 " instances exist", errors.ECODE_INVAL)
2676 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2677 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2678 raise errors.OpPrereqError("Cannot disable drbd helper while"
2679 " drbd-based instances exist",
2682 node_list = self.acquired_locks[locking.LEVEL_NODE]
2684 # if vg_name not None, checks given volume group on all nodes
2686 vglist = self.rpc.call_vg_list(node_list)
2687 for node in node_list:
2688 msg = vglist[node].fail_msg
2690 # ignoring down node
2691 self.LogWarning("Error while gathering data on node %s"
2692 " (ignoring node): %s", node, msg)
2694 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2696 constants.MIN_VG_SIZE)
2698 raise errors.OpPrereqError("Error on node '%s': %s" %
2699 (node, vgstatus), errors.ECODE_ENVIRON)
2701 if self.op.drbd_helper:
2702 # checks given drbd helper on all nodes
2703 helpers = self.rpc.call_drbd_helper(node_list)
2704 for node in node_list:
2705 ninfo = self.cfg.GetNodeInfo(node)
2707 self.LogInfo("Not checking drbd helper on offline node %s", node)
2709 msg = helpers[node].fail_msg
2711 raise errors.OpPrereqError("Error checking drbd helper on node"
2712 " '%s': %s" % (node, msg),
2713 errors.ECODE_ENVIRON)
2714 node_helper = helpers[node].payload
2715 if node_helper != self.op.drbd_helper:
2716 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2717 (node, node_helper), errors.ECODE_ENVIRON)
2719 self.cluster = cluster = self.cfg.GetClusterInfo()
2720 # validate params changes
2721 if self.op.beparams:
2722 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2723 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2725 if self.op.nicparams:
2726 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2727 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2728 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2731 # check all instances for consistency
2732 for instance in self.cfg.GetAllInstancesInfo().values():
2733 for nic_idx, nic in enumerate(instance.nics):
2734 params_copy = copy.deepcopy(nic.nicparams)
2735 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2737 # check parameter syntax
2739 objects.NIC.CheckParameterSyntax(params_filled)
2740 except errors.ConfigurationError, err:
2741 nic_errors.append("Instance %s, nic/%d: %s" %
2742 (instance.name, nic_idx, err))
2744 # if we're moving instances to routed, check that they have an ip
2745 target_mode = params_filled[constants.NIC_MODE]
2746 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2747 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2748 (instance.name, nic_idx))
2750 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2751 "\n".join(nic_errors))
2753 # hypervisor list/parameters
2754 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2755 if self.op.hvparams:
2756 for hv_name, hv_dict in self.op.hvparams.items():
2757 if hv_name not in self.new_hvparams:
2758 self.new_hvparams[hv_name] = hv_dict
2760 self.new_hvparams[hv_name].update(hv_dict)
2762 # os hypervisor parameters
2763 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2765 for os_name, hvs in self.op.os_hvp.items():
2766 if os_name not in self.new_os_hvp:
2767 self.new_os_hvp[os_name] = hvs
2769 for hv_name, hv_dict in hvs.items():
2770 if hv_name not in self.new_os_hvp[os_name]:
2771 self.new_os_hvp[os_name][hv_name] = hv_dict
2773 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2776 self.new_osp = objects.FillDict(cluster.osparams, {})
2777 if self.op.osparams:
2778 for os_name, osp in self.op.osparams.items():
2779 if os_name not in self.new_osp:
2780 self.new_osp[os_name] = {}
2782 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2785 if not self.new_osp[os_name]:
2786 # we removed all parameters
2787 del self.new_osp[os_name]
2789 # check the parameter validity (remote check)
2790 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2791 os_name, self.new_osp[os_name])
2793 # changes to the hypervisor list
2794 if self.op.enabled_hypervisors is not None:
2795 self.hv_list = self.op.enabled_hypervisors
2796 for hv in self.hv_list:
2797 # if the hypervisor doesn't already exist in the cluster
2798 # hvparams, we initialize it to empty, and then (in both
2799 # cases) we make sure to fill the defaults, as we might not
2800 # have a complete defaults list if the hypervisor wasn't
2802 if hv not in new_hvp:
2804 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2805 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2807 self.hv_list = cluster.enabled_hypervisors
2809 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2810 # either the enabled list has changed, or the parameters have, validate
2811 for hv_name, hv_params in self.new_hvparams.items():
2812 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2813 (self.op.enabled_hypervisors and
2814 hv_name in self.op.enabled_hypervisors)):
2815 # either this is a new hypervisor, or its parameters have changed
2816 hv_class = hypervisor.GetHypervisor(hv_name)
2817 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2818 hv_class.CheckParameterSyntax(hv_params)
2819 _CheckHVParams(self, node_list, hv_name, hv_params)
2822 # no need to check any newly-enabled hypervisors, since the
2823 # defaults have already been checked in the above code-block
2824 for os_name, os_hvp in self.new_os_hvp.items():
2825 for hv_name, hv_params in os_hvp.items():
2826 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2827 # we need to fill in the new os_hvp on top of the actual hv_p
2828 cluster_defaults = self.new_hvparams.get(hv_name, {})
2829 new_osp = objects.FillDict(cluster_defaults, hv_params)
2830 hv_class = hypervisor.GetHypervisor(hv_name)
2831 hv_class.CheckParameterSyntax(new_osp)
2832 _CheckHVParams(self, node_list, hv_name, new_osp)
2834 if self.op.default_iallocator:
2835 alloc_script = utils.FindFile(self.op.default_iallocator,
2836 constants.IALLOCATOR_SEARCH_PATH,
2838 if alloc_script is None:
2839 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2840 " specified" % self.op.default_iallocator,
2843 def Exec(self, feedback_fn):
2844 """Change the parameters of the cluster.
2847 if self.op.vg_name is not None:
2848 new_volume = self.op.vg_name
2851 if new_volume != self.cfg.GetVGName():
2852 self.cfg.SetVGName(new_volume)
2854 feedback_fn("Cluster LVM configuration already in desired"
2855 " state, not changing")
2856 if self.op.drbd_helper is not None:
2857 new_helper = self.op.drbd_helper
2860 if new_helper != self.cfg.GetDRBDHelper():
2861 self.cfg.SetDRBDHelper(new_helper)
2863 feedback_fn("Cluster DRBD helper already in desired state,"
2865 if self.op.hvparams:
2866 self.cluster.hvparams = self.new_hvparams
2868 self.cluster.os_hvp = self.new_os_hvp
2869 if self.op.enabled_hypervisors is not None:
2870 self.cluster.hvparams = self.new_hvparams
2871 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2872 if self.op.beparams:
2873 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2874 if self.op.nicparams:
2875 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2876 if self.op.osparams:
2877 self.cluster.osparams = self.new_osp
2879 if self.op.candidate_pool_size is not None:
2880 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2881 # we need to update the pool size here, otherwise the save will fail
2882 _AdjustCandidatePool(self, [])
2884 if self.op.maintain_node_health is not None:
2885 self.cluster.maintain_node_health = self.op.maintain_node_health
2887 if self.op.prealloc_wipe_disks is not None:
2888 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2890 if self.op.add_uids is not None:
2891 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2893 if self.op.remove_uids is not None:
2894 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2896 if self.op.uid_pool is not None:
2897 self.cluster.uid_pool = self.op.uid_pool
2899 if self.op.default_iallocator is not None:
2900 self.cluster.default_iallocator = self.op.default_iallocator
2902 if self.op.reserved_lvs is not None:
2903 self.cluster.reserved_lvs = self.op.reserved_lvs
2905 def helper_os(aname, mods, desc):
2907 lst = getattr(self.cluster, aname)
2908 for key, val in mods:
2909 if key == constants.DDM_ADD:
2911 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2914 elif key == constants.DDM_REMOVE:
2918 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2920 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2922 if self.op.hidden_os:
2923 helper_os("hidden_os", self.op.hidden_os, "hidden")
2925 if self.op.blacklisted_os:
2926 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2928 self.cfg.Update(self.cluster, feedback_fn)
2931 def _UploadHelper(lu, nodes, fname):
2932 """Helper for uploading a file and showing warnings.
2935 if os.path.exists(fname):
2936 result = lu.rpc.call_upload_file(nodes, fname)
2937 for to_node, to_result in result.items():
2938 msg = to_result.fail_msg
2940 msg = ("Copy of file %s to node %s failed: %s" %
2941 (fname, to_node, msg))
2942 lu.proc.LogWarning(msg)
2945 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
2946 """Distribute additional files which are part of the cluster configuration.
2948 ConfigWriter takes care of distributing the config and ssconf files, but
2949 there are more files which should be distributed to all nodes. This function
2950 makes sure those are copied.
2952 @param lu: calling logical unit
2953 @param additional_nodes: list of nodes not in the config to distribute to
2954 @type additional_vm: boolean
2955 @param additional_vm: whether the additional nodes are vm-capable or not
2958 # 1. Gather target nodes
2959 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2960 dist_nodes = lu.cfg.GetOnlineNodeList()
2961 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
2962 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
2963 if additional_nodes is not None:
2964 dist_nodes.extend(additional_nodes)
2966 vm_nodes.extend(additional_nodes)
2967 if myself.name in dist_nodes:
2968 dist_nodes.remove(myself.name)
2969 if myself.name in vm_nodes:
2970 vm_nodes.remove(myself.name)
2972 # 2. Gather files to distribute
2973 dist_files = set([constants.ETC_HOSTS,
2974 constants.SSH_KNOWN_HOSTS_FILE,
2975 constants.RAPI_CERT_FILE,
2976 constants.RAPI_USERS_FILE,
2977 constants.CONFD_HMAC_KEY,
2978 constants.CLUSTER_DOMAIN_SECRET_FILE,
2982 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2983 for hv_name in enabled_hypervisors:
2984 hv_class = hypervisor.GetHypervisor(hv_name)
2985 vm_files.update(hv_class.GetAncillaryFiles())
2987 # 3. Perform the files upload
2988 for fname in dist_files:
2989 _UploadHelper(lu, dist_nodes, fname)
2990 for fname in vm_files:
2991 _UploadHelper(lu, vm_nodes, fname)
2994 class LURedistributeConfig(NoHooksLU):
2995 """Force the redistribution of cluster configuration.
2997 This is a very simple LU.
3002 def ExpandNames(self):
3003 self.needed_locks = {
3004 locking.LEVEL_NODE: locking.ALL_SET,
3006 self.share_locks[locking.LEVEL_NODE] = 1
3008 def Exec(self, feedback_fn):
3009 """Redistribute the configuration.
3012 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3013 _RedistributeAncillaryFiles(self)
3016 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3017 """Sleep and poll for an instance's disk to sync.
3020 if not instance.disks or disks is not None and not disks:
3023 disks = _ExpandCheckDisks(instance, disks)
3026 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3028 node = instance.primary_node
3031 lu.cfg.SetDiskID(dev, node)
3033 # TODO: Convert to utils.Retry
3036 degr_retries = 10 # in seconds, as we sleep 1 second each time
3040 cumul_degraded = False
3041 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3042 msg = rstats.fail_msg
3044 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3047 raise errors.RemoteError("Can't contact node %s for mirror data,"
3048 " aborting." % node)
3051 rstats = rstats.payload
3053 for i, mstat in enumerate(rstats):
3055 lu.LogWarning("Can't compute data for node %s/%s",
3056 node, disks[i].iv_name)
3059 cumul_degraded = (cumul_degraded or
3060 (mstat.is_degraded and mstat.sync_percent is None))
3061 if mstat.sync_percent is not None:
3063 if mstat.estimated_time is not None:
3064 rem_time = ("%s remaining (estimated)" %
3065 utils.FormatSeconds(mstat.estimated_time))
3066 max_time = mstat.estimated_time
3068 rem_time = "no time estimate"
3069 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3070 (disks[i].iv_name, mstat.sync_percent, rem_time))
3072 # if we're done but degraded, let's do a few small retries, to
3073 # make sure we see a stable and not transient situation; therefore
3074 # we force restart of the loop
3075 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3076 logging.info("Degraded disks found, %d retries left", degr_retries)
3084 time.sleep(min(60, max_time))
3087 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3088 return not cumul_degraded
3091 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3092 """Check that mirrors are not degraded.
3094 The ldisk parameter, if True, will change the test from the
3095 is_degraded attribute (which represents overall non-ok status for
3096 the device(s)) to the ldisk (representing the local storage status).
3099 lu.cfg.SetDiskID(dev, node)
3103 if on_primary or dev.AssembleOnSecondary():
3104 rstats = lu.rpc.call_blockdev_find(node, dev)
3105 msg = rstats.fail_msg
3107 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3109 elif not rstats.payload:
3110 lu.LogWarning("Can't find disk on node %s", node)
3114 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3116 result = result and not rstats.payload.is_degraded
3119 for child in dev.children:
3120 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3125 class LUDiagnoseOS(NoHooksLU):
3126 """Logical unit for OS diagnose/query.
3131 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3135 _BLK = "blacklisted"
3137 _FIELDS_STATIC = utils.FieldSet()
3138 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3139 "parameters", "api_versions", _HID, _BLK)
3141 def CheckArguments(self):
3143 raise errors.OpPrereqError("Selective OS query not supported",
3146 _CheckOutputFields(static=self._FIELDS_STATIC,
3147 dynamic=self._FIELDS_DYNAMIC,
3148 selected=self.op.output_fields)
3150 def ExpandNames(self):
3151 # Lock all nodes, in shared mode
3152 # Temporary removal of locks, should be reverted later
3153 # TODO: reintroduce locks when they are lighter-weight
3154 self.needed_locks = {}
3155 #self.share_locks[locking.LEVEL_NODE] = 1
3156 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3159 def _DiagnoseByOS(rlist):
3160 """Remaps a per-node return list into an a per-os per-node dictionary
3162 @param rlist: a map with node names as keys and OS objects as values
3165 @return: a dictionary with osnames as keys and as value another
3166 map, with nodes as keys and tuples of (path, status, diagnose,
3167 variants, parameters, api_versions) as values, eg::
3169 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3170 (/srv/..., False, "invalid api")],
3171 "node2": [(/srv/..., True, "", [], [])]}
3176 # we build here the list of nodes that didn't fail the RPC (at RPC
3177 # level), so that nodes with a non-responding node daemon don't
3178 # make all OSes invalid
3179 good_nodes = [node_name for node_name in rlist
3180 if not rlist[node_name].fail_msg]
3181 for node_name, nr in rlist.items():
3182 if nr.fail_msg or not nr.payload:
3184 for (name, path, status, diagnose, variants,
3185 params, api_versions) in nr.payload:
3186 if name not in all_os:
3187 # build a list of nodes for this os containing empty lists
3188 # for each node in node_list
3190 for nname in good_nodes:
3191 all_os[name][nname] = []
3192 # convert params from [name, help] to (name, help)
3193 params = [tuple(v) for v in params]
3194 all_os[name][node_name].append((path, status, diagnose,
3195 variants, params, api_versions))
3198 def Exec(self, feedback_fn):
3199 """Compute the list of OSes.
3202 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3203 node_data = self.rpc.call_os_diagnose(valid_nodes)
3204 pol = self._DiagnoseByOS(node_data)
3206 cluster = self.cfg.GetClusterInfo()
3208 for os_name in utils.NiceSort(pol.keys()):
3209 os_data = pol[os_name]
3212 (variants, params, api_versions) = null_state = (set(), set(), set())
3213 for idx, osl in enumerate(os_data.values()):
3214 valid = bool(valid and osl and osl[0][1])
3216 (variants, params, api_versions) = null_state
3218 node_variants, node_params, node_api = osl[0][3:6]
3219 if idx == 0: # first entry
3220 variants = set(node_variants)
3221 params = set(node_params)
3222 api_versions = set(node_api)
3223 else: # keep consistency
3224 variants.intersection_update(node_variants)
3225 params.intersection_update(node_params)
3226 api_versions.intersection_update(node_api)
3228 is_hid = os_name in cluster.hidden_os
3229 is_blk = os_name in cluster.blacklisted_os
3230 if ((self._HID not in self.op.output_fields and is_hid) or
3231 (self._BLK not in self.op.output_fields and is_blk) or
3232 (self._VLD not in self.op.output_fields and not valid)):
3235 for field in self.op.output_fields:
3238 elif field == self._VLD:
3240 elif field == "node_status":
3241 # this is just a copy of the dict
3243 for node_name, nos_list in os_data.items():
3244 val[node_name] = nos_list
3245 elif field == "variants":
3246 val = utils.NiceSort(list(variants))
3247 elif field == "parameters":
3249 elif field == "api_versions":
3250 val = list(api_versions)
3251 elif field == self._HID:
3253 elif field == self._BLK:
3256 raise errors.ParameterError(field)
3263 class LURemoveNode(LogicalUnit):
3264 """Logical unit for removing a node.
3267 HPATH = "node-remove"
3268 HTYPE = constants.HTYPE_NODE
3273 def BuildHooksEnv(self):
3276 This doesn't run on the target node in the pre phase as a failed
3277 node would then be impossible to remove.
3281 "OP_TARGET": self.op.node_name,
3282 "NODE_NAME": self.op.node_name,
3284 all_nodes = self.cfg.GetNodeList()
3286 all_nodes.remove(self.op.node_name)
3288 logging.warning("Node %s which is about to be removed not found"
3289 " in the all nodes list", self.op.node_name)
3290 return env, all_nodes, all_nodes
3292 def CheckPrereq(self):
3293 """Check prerequisites.
3296 - the node exists in the configuration
3297 - it does not have primary or secondary instances
3298 - it's not the master
3300 Any errors are signaled by raising errors.OpPrereqError.
3303 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3304 node = self.cfg.GetNodeInfo(self.op.node_name)
3305 assert node is not None
3307 instance_list = self.cfg.GetInstanceList()
3309 masternode = self.cfg.GetMasterNode()
3310 if node.name == masternode:
3311 raise errors.OpPrereqError("Node is the master node,"
3312 " you need to failover first.",
3315 for instance_name in instance_list:
3316 instance = self.cfg.GetInstanceInfo(instance_name)
3317 if node.name in instance.all_nodes:
3318 raise errors.OpPrereqError("Instance %s is still running on the node,"
3319 " please remove first." % instance_name,
3321 self.op.node_name = node.name
3324 def Exec(self, feedback_fn):
3325 """Removes the node from the cluster.
3329 logging.info("Stopping the node daemon and removing configs from node %s",
3332 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3334 # Promote nodes to master candidate as needed
3335 _AdjustCandidatePool(self, exceptions=[node.name])
3336 self.context.RemoveNode(node.name)
3338 # Run post hooks on the node before it's removed
3339 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3341 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3343 # pylint: disable-msg=W0702
3344 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3346 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3347 msg = result.fail_msg
3349 self.LogWarning("Errors encountered on the remote node while leaving"
3350 " the cluster: %s", msg)
3352 # Remove node from our /etc/hosts
3353 if self.cfg.GetClusterInfo().modify_etc_hosts:
3354 master_node = self.cfg.GetMasterNode()
3355 result = self.rpc.call_etc_hosts_modify(master_node,
3356 constants.ETC_HOSTS_REMOVE,
3358 result.Raise("Can't update hosts file with new host data")
3359 _RedistributeAncillaryFiles(self)
3362 class LUQueryNodes(NoHooksLU):
3363 """Logical unit for querying nodes.
3366 # pylint: disable-msg=W0142
3369 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3370 ("use_locking", False, ht.TBool),
3374 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3375 "master_candidate", "offline", "drained",
3376 "master_capable", "vm_capable"]
3378 _FIELDS_DYNAMIC = utils.FieldSet(
3380 "mtotal", "mnode", "mfree",
3382 "ctotal", "cnodes", "csockets",
3385 _FIELDS_STATIC = utils.FieldSet(*[
3386 "pinst_cnt", "sinst_cnt",
3387 "pinst_list", "sinst_list",
3388 "pip", "sip", "tags",
3390 "role"] + _SIMPLE_FIELDS
3393 def CheckArguments(self):
3394 _CheckOutputFields(static=self._FIELDS_STATIC,
3395 dynamic=self._FIELDS_DYNAMIC,
3396 selected=self.op.output_fields)
3398 def ExpandNames(self):
3399 self.needed_locks = {}
3400 self.share_locks[locking.LEVEL_NODE] = 1
3403 self.wanted = _GetWantedNodes(self, self.op.names)
3405 self.wanted = locking.ALL_SET
3407 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3408 self.do_locking = self.do_node_query and self.op.use_locking
3410 # if we don't request only static fields, we need to lock the nodes
3411 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3413 def Exec(self, feedback_fn):
3414 """Computes the list of nodes and their attributes.
3417 all_info = self.cfg.GetAllNodesInfo()
3419 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3420 elif self.wanted != locking.ALL_SET:
3421 nodenames = self.wanted
3422 missing = set(nodenames).difference(all_info.keys())
3424 raise errors.OpExecError(
3425 "Some nodes were removed before retrieving their data: %s" % missing)
3427 nodenames = all_info.keys()
3429 nodenames = utils.NiceSort(nodenames)
3430 nodelist = [all_info[name] for name in nodenames]
3432 # begin data gathering
3434 if self.do_node_query:
3436 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3437 self.cfg.GetHypervisorType())
3438 for name in nodenames:
3439 nodeinfo = node_data[name]
3440 if not nodeinfo.fail_msg and nodeinfo.payload:
3441 nodeinfo = nodeinfo.payload
3442 fn = utils.TryConvert
3444 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3445 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3446 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3447 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3448 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3449 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3450 "bootid": nodeinfo.get('bootid', None),
3451 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3452 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3455 live_data[name] = {}
3457 live_data = dict.fromkeys(nodenames, {})
3459 node_to_primary = dict([(name, set()) for name in nodenames])
3460 node_to_secondary = dict([(name, set()) for name in nodenames])
3462 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3463 "sinst_cnt", "sinst_list"))
3464 if inst_fields & frozenset(self.op.output_fields):
3465 inst_data = self.cfg.GetAllInstancesInfo()
3467 for inst in inst_data.values():
3468 if inst.primary_node in node_to_primary:
3469 node_to_primary[inst.primary_node].add(inst.name)
3470 for secnode in inst.secondary_nodes:
3471 if secnode in node_to_secondary:
3472 node_to_secondary[secnode].add(inst.name)
3474 master_node = self.cfg.GetMasterNode()
3476 # end data gathering
3479 for node in nodelist:
3481 for field in self.op.output_fields:
3482 if field in self._SIMPLE_FIELDS:
3483 val = getattr(node, field)
3484 elif field == "pinst_list":
3485 val = list(node_to_primary[node.name])
3486 elif field == "sinst_list":
3487 val = list(node_to_secondary[node.name])
3488 elif field == "pinst_cnt":
3489 val = len(node_to_primary[node.name])
3490 elif field == "sinst_cnt":
3491 val = len(node_to_secondary[node.name])
3492 elif field == "pip":
3493 val = node.primary_ip
3494 elif field == "sip":
3495 val = node.secondary_ip
3496 elif field == "tags":
3497 val = list(node.GetTags())
3498 elif field == "master":
3499 val = node.name == master_node
3500 elif self._FIELDS_DYNAMIC.Matches(field):
3501 val = live_data[node.name].get(field, None)
3502 elif field == "role":
3503 if node.name == master_node:
3505 elif node.master_candidate:
3514 raise errors.ParameterError(field)
3515 node_output.append(val)
3516 output.append(node_output)
3521 class LUQueryNodeVolumes(NoHooksLU):
3522 """Logical unit for getting volumes on node(s).
3526 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3527 ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3530 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3531 _FIELDS_STATIC = utils.FieldSet("node")
3533 def CheckArguments(self):
3534 _CheckOutputFields(static=self._FIELDS_STATIC,
3535 dynamic=self._FIELDS_DYNAMIC,
3536 selected=self.op.output_fields)
3538 def ExpandNames(self):
3539 self.needed_locks = {}
3540 self.share_locks[locking.LEVEL_NODE] = 1
3541 if not self.op.nodes:
3542 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3544 self.needed_locks[locking.LEVEL_NODE] = \
3545 _GetWantedNodes(self, self.op.nodes)
3547 def Exec(self, feedback_fn):
3548 """Computes the list of nodes and their attributes.
3551 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3552 volumes = self.rpc.call_node_volumes(nodenames)
3554 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3555 in self.cfg.GetInstanceList()]
3557 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3560 for node in nodenames:
3561 nresult = volumes[node]
3564 msg = nresult.fail_msg
3566 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3569 node_vols = nresult.payload[:]
3570 node_vols.sort(key=lambda vol: vol['dev'])
3572 for vol in node_vols:
3574 for field in self.op.output_fields:
3577 elif field == "phys":
3581 elif field == "name":
3583 elif field == "size":
3584 val = int(float(vol['size']))
3585 elif field == "instance":
3587 if node not in lv_by_node[inst]:
3589 if vol['name'] in lv_by_node[inst][node]:
3595 raise errors.ParameterError(field)
3596 node_output.append(str(val))
3598 output.append(node_output)
3603 class LUQueryNodeStorage(NoHooksLU):
3604 """Logical unit for getting information on storage units on node(s).
3607 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3609 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3610 ("storage_type", ht.NoDefault, _CheckStorageType),
3611 ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3612 ("name", None, ht.TMaybeString),
3616 def CheckArguments(self):
3617 _CheckOutputFields(static=self._FIELDS_STATIC,
3618 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3619 selected=self.op.output_fields)
3621 def ExpandNames(self):
3622 self.needed_locks = {}
3623 self.share_locks[locking.LEVEL_NODE] = 1
3626 self.needed_locks[locking.LEVEL_NODE] = \
3627 _GetWantedNodes(self, self.op.nodes)
3629 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3631 def Exec(self, feedback_fn):
3632 """Computes the list of nodes and their attributes.
3635 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3637 # Always get name to sort by
3638 if constants.SF_NAME in self.op.output_fields:
3639 fields = self.op.output_fields[:]
3641 fields = [constants.SF_NAME] + self.op.output_fields
3643 # Never ask for node or type as it's only known to the LU
3644 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3645 while extra in fields:
3646 fields.remove(extra)
3648 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3649 name_idx = field_idx[constants.SF_NAME]
3651 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3652 data = self.rpc.call_storage_list(self.nodes,
3653 self.op.storage_type, st_args,
3654 self.op.name, fields)
3658 for node in utils.NiceSort(self.nodes):
3659 nresult = data[node]
3663 msg = nresult.fail_msg
3665 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3668 rows = dict([(row[name_idx], row) for row in nresult.payload])
3670 for name in utils.NiceSort(rows.keys()):
3675 for field in self.op.output_fields:
3676 if field == constants.SF_NODE:
3678 elif field == constants.SF_TYPE:
3679 val = self.op.storage_type
3680 elif field in field_idx:
3681 val = row[field_idx[field]]
3683 raise errors.ParameterError(field)
3692 class LUModifyNodeStorage(NoHooksLU):
3693 """Logical unit for modifying a storage volume on a node.
3698 ("storage_type", ht.NoDefault, _CheckStorageType),
3699 ("name", ht.NoDefault, ht.TNonEmptyString),
3700 ("changes", ht.NoDefault, ht.TDict),
3704 def CheckArguments(self):
3705 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3707 storage_type = self.op.storage_type
3710 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3712 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3713 " modified" % storage_type,
3716 diff = set(self.op.changes.keys()) - modifiable
3718 raise errors.OpPrereqError("The following fields can not be modified for"
3719 " storage units of type '%s': %r" %
3720 (storage_type, list(diff)),
3723 def ExpandNames(self):
3724 self.needed_locks = {
3725 locking.LEVEL_NODE: self.op.node_name,
3728 def Exec(self, feedback_fn):
3729 """Computes the list of nodes and their attributes.
3732 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3733 result = self.rpc.call_storage_modify(self.op.node_name,
3734 self.op.storage_type, st_args,
3735 self.op.name, self.op.changes)
3736 result.Raise("Failed to modify storage unit '%s' on %s" %
3737 (self.op.name, self.op.node_name))
3740 class LUAddNode(LogicalUnit):
3741 """Logical unit for adding node to the cluster.
3745 HTYPE = constants.HTYPE_NODE
3748 ("primary_ip", None, ht.NoType),
3749 ("secondary_ip", None, ht.TMaybeString),
3750 ("readd", False, ht.TBool),
3751 ("group", None, ht.TMaybeString),
3752 ("master_capable", None, ht.TMaybeBool),
3753 ("vm_capable", None, ht.TMaybeBool),
3755 _NFLAGS = ["master_capable", "vm_capable"]
3757 def CheckArguments(self):
3758 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3759 # validate/normalize the node name
3760 self.hostname = netutils.GetHostname(name=self.op.node_name,
3761 family=self.primary_ip_family)
3762 self.op.node_name = self.hostname.name
3763 if self.op.readd and self.op.group:
3764 raise errors.OpPrereqError("Cannot pass a node group when a node is"
3765 " being readded", errors.ECODE_INVAL)
3767 def BuildHooksEnv(self):
3770 This will run on all nodes before, and on all nodes + the new node after.
3774 "OP_TARGET": self.op.node_name,
3775 "NODE_NAME": self.op.node_name,
3776 "NODE_PIP": self.op.primary_ip,
3777 "NODE_SIP": self.op.secondary_ip,
3778 "MASTER_CAPABLE": str(self.op.master_capable),
3779 "VM_CAPABLE": str(self.op.vm_capable),
3781 nodes_0 = self.cfg.GetNodeList()
3782 nodes_1 = nodes_0 + [self.op.node_name, ]
3783 return env, nodes_0, nodes_1
3785 def CheckPrereq(self):
3786 """Check prerequisites.
3789 - the new node is not already in the config
3791 - its parameters (single/dual homed) matches the cluster
3793 Any errors are signaled by raising errors.OpPrereqError.
3797 hostname = self.hostname
3798 node = hostname.name
3799 primary_ip = self.op.primary_ip = hostname.ip
3800 if self.op.secondary_ip is None:
3801 if self.primary_ip_family == netutils.IP6Address.family:
3802 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3803 " IPv4 address must be given as secondary",
3805 self.op.secondary_ip = primary_ip
3807 secondary_ip = self.op.secondary_ip
3808 if not netutils.IP4Address.IsValid(secondary_ip):
3809 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3810 " address" % secondary_ip, errors.ECODE_INVAL)
3812 node_list = cfg.GetNodeList()
3813 if not self.op.readd and node in node_list:
3814 raise errors.OpPrereqError("Node %s is already in the configuration" %
3815 node, errors.ECODE_EXISTS)
3816 elif self.op.readd and node not in node_list:
3817 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3820 self.changed_primary_ip = False
3822 for existing_node_name in node_list:
3823 existing_node = cfg.GetNodeInfo(existing_node_name)
3825 if self.op.readd and node == existing_node_name:
3826 if existing_node.secondary_ip != secondary_ip:
3827 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3828 " address configuration as before",
3830 if existing_node.primary_ip != primary_ip:
3831 self.changed_primary_ip = True
3835 if (existing_node.primary_ip == primary_ip or
3836 existing_node.secondary_ip == primary_ip or
3837 existing_node.primary_ip == secondary_ip or
3838 existing_node.secondary_ip == secondary_ip):
3839 raise errors.OpPrereqError("New node ip address(es) conflict with"
3840 " existing node %s" % existing_node.name,
3841 errors.ECODE_NOTUNIQUE)
3843 # After this 'if' block, None is no longer a valid value for the
3844 # _capable op attributes
3846 old_node = self.cfg.GetNodeInfo(node)
3847 assert old_node is not None, "Can't retrieve locked node %s" % node
3848 for attr in self._NFLAGS:
3849 if getattr(self.op, attr) is None:
3850 setattr(self.op, attr, getattr(old_node, attr))
3852 for attr in self._NFLAGS:
3853 if getattr(self.op, attr) is None:
3854 setattr(self.op, attr, True)
3856 if self.op.readd and not self.op.vm_capable:
3857 pri, sec = cfg.GetNodeInstances(node)
3859 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
3860 " flag set to false, but it already holds"
3861 " instances" % node,
3864 # check that the type of the node (single versus dual homed) is the
3865 # same as for the master
3866 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3867 master_singlehomed = myself.secondary_ip == myself.primary_ip
3868 newbie_singlehomed = secondary_ip == primary_ip
3869 if master_singlehomed != newbie_singlehomed:
3870 if master_singlehomed:
3871 raise errors.OpPrereqError("The master has no secondary ip but the"
3872 " new node has one",
3875 raise errors.OpPrereqError("The master has a secondary ip but the"
3876 " new node doesn't have one",
3879 # checks reachability
3880 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3881 raise errors.OpPrereqError("Node not reachable by ping",
3882 errors.ECODE_ENVIRON)
3884 if not newbie_singlehomed:
3885 # check reachability from my secondary ip to newbie's secondary ip
3886 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3887 source=myself.secondary_ip):
3888 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3889 " based ping to node daemon port",
3890 errors.ECODE_ENVIRON)
3897 if self.op.master_capable:
3898 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3900 self.master_candidate = False
3903 self.new_node = old_node
3905 node_group = cfg.LookupNodeGroup(self.op.group)
3906 self.new_node = objects.Node(name=node,
3907 primary_ip=primary_ip,
3908 secondary_ip=secondary_ip,
3909 master_candidate=self.master_candidate,
3910 offline=False, drained=False,
3913 def Exec(self, feedback_fn):
3914 """Adds the new node to the cluster.
3917 new_node = self.new_node
3918 node = new_node.name
3920 # for re-adds, reset the offline/drained/master-candidate flags;
3921 # we need to reset here, otherwise offline would prevent RPC calls
3922 # later in the procedure; this also means that if the re-add
3923 # fails, we are left with a non-offlined, broken node
3925 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3926 self.LogInfo("Readding a node, the offline/drained flags were reset")
3927 # if we demote the node, we do cleanup later in the procedure
3928 new_node.master_candidate = self.master_candidate
3929 if self.changed_primary_ip:
3930 new_node.primary_ip = self.op.primary_ip
3932 # copy the master/vm_capable flags
3933 for attr in self._NFLAGS:
3934 setattr(new_node, attr, getattr(self.op, attr))
3936 # notify the user about any possible mc promotion
3937 if new_node.master_candidate:
3938 self.LogInfo("Node will be a master candidate")
3940 # check connectivity
3941 result = self.rpc.call_version([node])[node]
3942 result.Raise("Can't get version information from node %s" % node)
3943 if constants.PROTOCOL_VERSION == result.payload:
3944 logging.info("Communication to node %s fine, sw version %s match",
3945 node, result.payload)
3947 raise errors.OpExecError("Version mismatch master version %s,"
3948 " node version %s" %
3949 (constants.PROTOCOL_VERSION, result.payload))
3951 # Add node to our /etc/hosts, and add key to known_hosts
3952 if self.cfg.GetClusterInfo().modify_etc_hosts:
3953 master_node = self.cfg.GetMasterNode()
3954 result = self.rpc.call_etc_hosts_modify(master_node,
3955 constants.ETC_HOSTS_ADD,
3958 result.Raise("Can't update hosts file with new host data")
3960 if new_node.secondary_ip != new_node.primary_ip:
3961 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
3964 node_verify_list = [self.cfg.GetMasterNode()]
3965 node_verify_param = {
3966 constants.NV_NODELIST: [node],
3967 # TODO: do a node-net-test as well?
3970 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3971 self.cfg.GetClusterName())
3972 for verifier in node_verify_list:
3973 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3974 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3976 for failed in nl_payload:
3977 feedback_fn("ssh/hostname verification failed"
3978 " (checking from %s): %s" %
3979 (verifier, nl_payload[failed]))
3980 raise errors.OpExecError("ssh/hostname verification failed.")
3983 _RedistributeAncillaryFiles(self)
3984 self.context.ReaddNode(new_node)
3985 # make sure we redistribute the config
3986 self.cfg.Update(new_node, feedback_fn)
3987 # and make sure the new node will not have old files around
3988 if not new_node.master_candidate:
3989 result = self.rpc.call_node_demote_from_mc(new_node.name)
3990 msg = result.fail_msg
3992 self.LogWarning("Node failed to demote itself from master"
3993 " candidate status: %s" % msg)
3995 _RedistributeAncillaryFiles(self, additional_nodes=[node],
3996 additional_vm=self.op.vm_capable)
3997 self.context.AddNode(new_node, self.proc.GetECId())
4000 class LUSetNodeParams(LogicalUnit):
4001 """Modifies the parameters of a node.
4003 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4004 to the node role (as _ROLE_*)
4005 @cvar _R2F: a dictionary from node role to tuples of flags
4006 @cvar _FLAGS: a list of attribute names corresponding to the flags
4009 HPATH = "node-modify"
4010 HTYPE = constants.HTYPE_NODE
4013 ("master_candidate", None, ht.TMaybeBool),
4014 ("offline", None, ht.TMaybeBool),
4015 ("drained", None, ht.TMaybeBool),
4016 ("auto_promote", False, ht.TBool),
4017 ("master_capable", None, ht.TMaybeBool),
4018 ("vm_capable", None, ht.TMaybeBool),
4019 ("secondary_ip", None, ht.TMaybeString),
4023 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4025 (True, False, False): _ROLE_CANDIDATE,
4026 (False, True, False): _ROLE_DRAINED,
4027 (False, False, True): _ROLE_OFFLINE,
4028 (False, False, False): _ROLE_REGULAR,
4030 _R2F = dict((v, k) for k, v in _F2R.items())
4031 _FLAGS = ["master_candidate", "drained", "offline"]
4033 def CheckArguments(self):
4034 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4035 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4036 self.op.master_capable, self.op.vm_capable,
4037 self.op.secondary_ip]
4038 if all_mods.count(None) == len(all_mods):
4039 raise errors.OpPrereqError("Please pass at least one modification",
4041 if all_mods.count(True) > 1:
4042 raise errors.OpPrereqError("Can't set the node into more than one"
4043 " state at the same time",
4046 # Boolean value that tells us whether we might be demoting from MC
4047 self.might_demote = (self.op.master_candidate == False or
4048 self.op.offline == True or
4049 self.op.drained == True or
4050 self.op.master_capable == False)
4052 if self.op.secondary_ip:
4053 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4054 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4055 " address" % self.op.secondary_ip,
4058 self.lock_all = self.op.auto_promote and self.might_demote
4059 self.lock_instances = self.op.secondary_ip is not None
4061 def ExpandNames(self):
4063 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4065 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4067 if self.lock_instances:
4068 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4070 def DeclareLocks(self, level):
4071 # If we have locked all instances, before waiting to lock nodes, release
4072 # all the ones living on nodes unrelated to the current operation.
4073 if level == locking.LEVEL_NODE and self.lock_instances:
4074 instances_release = []
4076 self.affected_instances = []
4077 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4078 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4079 instance = self.context.cfg.GetInstanceInfo(instance_name)
4080 i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4081 if i_mirrored and self.op.node_name in instance.all_nodes:
4082 instances_keep.append(instance_name)
4083 self.affected_instances.append(instance)
4085 instances_release.append(instance_name)
4086 if instances_release:
4087 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4088 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4090 def BuildHooksEnv(self):
4093 This runs on the master node.
4097 "OP_TARGET": self.op.node_name,
4098 "MASTER_CANDIDATE": str(self.op.master_candidate),
4099 "OFFLINE": str(self.op.offline),
4100 "DRAINED": str(self.op.drained),
4101 "MASTER_CAPABLE": str(self.op.master_capable),
4102 "VM_CAPABLE": str(self.op.vm_capable),
4104 nl = [self.cfg.GetMasterNode(),
4108 def CheckPrereq(self):
4109 """Check prerequisites.
4111 This only checks the instance list against the existing names.
4114 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4116 if (self.op.master_candidate is not None or
4117 self.op.drained is not None or
4118 self.op.offline is not None):
4119 # we can't change the master's node flags
4120 if self.op.node_name == self.cfg.GetMasterNode():
4121 raise errors.OpPrereqError("The master role can be changed"
4122 " only via master-failover",
4125 if self.op.master_candidate and not node.master_capable:
4126 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4127 " it a master candidate" % node.name,
4130 if self.op.vm_capable == False:
4131 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4133 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4134 " the vm_capable flag" % node.name,
4137 if node.master_candidate and self.might_demote and not self.lock_all:
4138 assert not self.op.auto_promote, "auto-promote set but lock_all not"
4139 # check if after removing the current node, we're missing master
4141 (mc_remaining, mc_should, _) = \
4142 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4143 if mc_remaining < mc_should:
4144 raise errors.OpPrereqError("Not enough master candidates, please"
4145 " pass auto_promote to allow promotion",
4148 self.old_flags = old_flags = (node.master_candidate,
4149 node.drained, node.offline)
4150 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4151 self.old_role = old_role = self._F2R[old_flags]
4153 # Check for ineffective changes
4154 for attr in self._FLAGS:
4155 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4156 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4157 setattr(self.op, attr, None)
4159 # Past this point, any flag change to False means a transition
4160 # away from the respective state, as only real changes are kept
4162 # If we're being deofflined/drained, we'll MC ourself if needed
4163 if (self.op.drained == False or self.op.offline == False or
4164 (self.op.master_capable and not node.master_capable)):
4165 if _DecideSelfPromotion(self):
4166 self.op.master_candidate = True
4167 self.LogInfo("Auto-promoting node to master candidate")
4169 # If we're no longer master capable, we'll demote ourselves from MC
4170 if self.op.master_capable == False and node.master_candidate:
4171 self.LogInfo("Demoting from master candidate")
4172 self.op.master_candidate = False
4175 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4176 if self.op.master_candidate:
4177 new_role = self._ROLE_CANDIDATE
4178 elif self.op.drained:
4179 new_role = self._ROLE_DRAINED
4180 elif self.op.offline:
4181 new_role = self._ROLE_OFFLINE
4182 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4183 # False is still in new flags, which means we're un-setting (the
4185 new_role = self._ROLE_REGULAR
4186 else: # no new flags, nothing, keep old role
4189 self.new_role = new_role
4191 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4192 # Trying to transition out of offline status
4193 result = self.rpc.call_version([node.name])[node.name]
4195 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4196 " to report its version: %s" %
4197 (node.name, result.fail_msg),
4200 self.LogWarning("Transitioning node from offline to online state"
4201 " without using re-add. Please make sure the node"
4204 if self.op.secondary_ip:
4205 # Ok even without locking, because this can't be changed by any LU
4206 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4207 master_singlehomed = master.secondary_ip == master.primary_ip
4208 if master_singlehomed and self.op.secondary_ip:
4209 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4210 " homed cluster", errors.ECODE_INVAL)
4213 if self.affected_instances:
4214 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4215 " node has instances (%s) configured"
4216 " to use it" % self.affected_instances)
4218 # On online nodes, check that no instances are running, and that
4219 # the node has the new ip and we can reach it.
4220 for instance in self.affected_instances:
4221 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4223 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4224 if master.name != node.name:
4225 # check reachability from master secondary ip to new secondary ip
4226 if not netutils.TcpPing(self.op.secondary_ip,
4227 constants.DEFAULT_NODED_PORT,
4228 source=master.secondary_ip):
4229 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4230 " based ping to node daemon port",
4231 errors.ECODE_ENVIRON)
4233 def Exec(self, feedback_fn):
4238 old_role = self.old_role
4239 new_role = self.new_role
4243 for attr in ["master_capable", "vm_capable"]:
4244 val = getattr(self.op, attr)
4246 setattr(node, attr, val)
4247 result.append((attr, str(val)))
4249 if new_role != old_role:
4250 # Tell the node to demote itself, if no longer MC and not offline
4251 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4252 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4254 self.LogWarning("Node failed to demote itself: %s", msg)
4256 new_flags = self._R2F[new_role]
4257 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4259 result.append((desc, str(nf)))
4260 (node.master_candidate, node.drained, node.offline) = new_flags
4262 # we locked all nodes, we adjust the CP before updating this node
4264 _AdjustCandidatePool(self, [node.name])
4266 if self.op.secondary_ip:
4267 node.secondary_ip = self.op.secondary_ip
4268 result.append(("secondary_ip", self.op.secondary_ip))
4270 # this will trigger configuration file update, if needed
4271 self.cfg.Update(node, feedback_fn)
4273 # this will trigger job queue propagation or cleanup if the mc
4275 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4276 self.context.ReaddNode(node)
4281 class LUPowercycleNode(NoHooksLU):
4282 """Powercycles a node.
4291 def CheckArguments(self):
4292 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4293 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4294 raise errors.OpPrereqError("The node is the master and the force"
4295 " parameter was not set",
4298 def ExpandNames(self):
4299 """Locking for PowercycleNode.
4301 This is a last-resort option and shouldn't block on other
4302 jobs. Therefore, we grab no locks.
4305 self.needed_locks = {}
4307 def Exec(self, feedback_fn):
4311 result = self.rpc.call_node_powercycle(self.op.node_name,
4312 self.cfg.GetHypervisorType())
4313 result.Raise("Failed to schedule the reboot")
4314 return result.payload
4317 class LUQueryClusterInfo(NoHooksLU):
4318 """Query cluster configuration.
4323 def ExpandNames(self):
4324 self.needed_locks = {}
4326 def Exec(self, feedback_fn):
4327 """Return cluster config.
4330 cluster = self.cfg.GetClusterInfo()
4333 # Filter just for enabled hypervisors
4334 for os_name, hv_dict in cluster.os_hvp.items():
4335 os_hvp[os_name] = {}
4336 for hv_name, hv_params in hv_dict.items():
4337 if hv_name in cluster.enabled_hypervisors:
4338 os_hvp[os_name][hv_name] = hv_params
4340 # Convert ip_family to ip_version
4341 primary_ip_version = constants.IP4_VERSION
4342 if cluster.primary_ip_family == netutils.IP6Address.family:
4343 primary_ip_version = constants.IP6_VERSION
4346 "software_version": constants.RELEASE_VERSION,
4347 "protocol_version": constants.PROTOCOL_VERSION,
4348 "config_version": constants.CONFIG_VERSION,
4349 "os_api_version": max(constants.OS_API_VERSIONS),
4350 "export_version": constants.EXPORT_VERSION,
4351 "architecture": (platform.architecture()[0], platform.machine()),
4352 "name": cluster.cluster_name,
4353 "master": cluster.master_node,
4354 "default_hypervisor": cluster.enabled_hypervisors[0],
4355 "enabled_hypervisors": cluster.enabled_hypervisors,
4356 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4357 for hypervisor_name in cluster.enabled_hypervisors]),
4359 "beparams": cluster.beparams,
4360 "osparams": cluster.osparams,
4361 "nicparams": cluster.nicparams,
4362 "candidate_pool_size": cluster.candidate_pool_size,
4363 "master_netdev": cluster.master_netdev,
4364 "volume_group_name": cluster.volume_group_name,
4365 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4366 "file_storage_dir": cluster.file_storage_dir,
4367 "maintain_node_health": cluster.maintain_node_health,
4368 "ctime": cluster.ctime,
4369 "mtime": cluster.mtime,
4370 "uuid": cluster.uuid,
4371 "tags": list(cluster.GetTags()),
4372 "uid_pool": cluster.uid_pool,
4373 "default_iallocator": cluster.default_iallocator,
4374 "reserved_lvs": cluster.reserved_lvs,
4375 "primary_ip_version": primary_ip_version,
4376 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4382 class LUQueryConfigValues(NoHooksLU):
4383 """Return configuration values.
4386 _OP_PARAMS = [_POutputFields]
4388 _FIELDS_DYNAMIC = utils.FieldSet()
4389 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4390 "watcher_pause", "volume_group_name")
4392 def CheckArguments(self):
4393 _CheckOutputFields(static=self._FIELDS_STATIC,
4394 dynamic=self._FIELDS_DYNAMIC,
4395 selected=self.op.output_fields)
4397 def ExpandNames(self):
4398 self.needed_locks = {}
4400 def Exec(self, feedback_fn):
4401 """Dump a representation of the cluster config to the standard output.
4405 for field in self.op.output_fields:
4406 if field == "cluster_name":
4407 entry = self.cfg.GetClusterName()
4408 elif field == "master_node":
4409 entry = self.cfg.GetMasterNode()
4410 elif field == "drain_flag":
4411 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4412 elif field == "watcher_pause":
4413 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4414 elif field == "volume_group_name":
4415 entry = self.cfg.GetVGName()
4417 raise errors.ParameterError(field)
4418 values.append(entry)
4422 class LUActivateInstanceDisks(NoHooksLU):
4423 """Bring up an instance's disks.
4428 ("ignore_size", False, ht.TBool),
4432 def ExpandNames(self):
4433 self._ExpandAndLockInstance()
4434 self.needed_locks[locking.LEVEL_NODE] = []
4435 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4437 def DeclareLocks(self, level):
4438 if level == locking.LEVEL_NODE:
4439 self._LockInstancesNodes()
4441 def CheckPrereq(self):
4442 """Check prerequisites.
4444 This checks that the instance is in the cluster.
4447 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4448 assert self.instance is not None, \
4449 "Cannot retrieve locked instance %s" % self.op.instance_name
4450 _CheckNodeOnline(self, self.instance.primary_node)
4452 def Exec(self, feedback_fn):
4453 """Activate the disks.
4456 disks_ok, disks_info = \
4457 _AssembleInstanceDisks(self, self.instance,
4458 ignore_size=self.op.ignore_size)
4460 raise errors.OpExecError("Cannot activate block devices")
4465 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4467 """Prepare the block devices for an instance.
4469 This sets up the block devices on all nodes.
4471 @type lu: L{LogicalUnit}
4472 @param lu: the logical unit on whose behalf we execute
4473 @type instance: L{objects.Instance}
4474 @param instance: the instance for whose disks we assemble
4475 @type disks: list of L{objects.Disk} or None
4476 @param disks: which disks to assemble (or all, if None)
4477 @type ignore_secondaries: boolean
4478 @param ignore_secondaries: if true, errors on secondary nodes
4479 won't result in an error return from the function
4480 @type ignore_size: boolean
4481 @param ignore_size: if true, the current known size of the disk
4482 will not be used during the disk activation, useful for cases
4483 when the size is wrong
4484 @return: False if the operation failed, otherwise a list of
4485 (host, instance_visible_name, node_visible_name)
4486 with the mapping from node devices to instance devices
4491 iname = instance.name
4492 disks = _ExpandCheckDisks(instance, disks)
4494 # With the two passes mechanism we try to reduce the window of
4495 # opportunity for the race condition of switching DRBD to primary
4496 # before handshaking occured, but we do not eliminate it
4498 # The proper fix would be to wait (with some limits) until the
4499 # connection has been made and drbd transitions from WFConnection
4500 # into any other network-connected state (Connected, SyncTarget,
4503 # 1st pass, assemble on all nodes in secondary mode
4504 for inst_disk in disks:
4505 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4507 node_disk = node_disk.Copy()
4508 node_disk.UnsetSize()
4509 lu.cfg.SetDiskID(node_disk, node)
4510 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4511 msg = result.fail_msg
4513 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4514 " (is_primary=False, pass=1): %s",
4515 inst_disk.iv_name, node, msg)
4516 if not ignore_secondaries:
4519 # FIXME: race condition on drbd migration to primary
4521 # 2nd pass, do only the primary node
4522 for inst_disk in disks:
4525 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4526 if node != instance.primary_node:
4529 node_disk = node_disk.Copy()
4530 node_disk.UnsetSize()
4531 lu.cfg.SetDiskID(node_disk, node)
4532 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4533 msg = result.fail_msg
4535 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4536 " (is_primary=True, pass=2): %s",
4537 inst_disk.iv_name, node, msg)
4540 dev_path = result.payload
4542 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4544 # leave the disks configured for the primary node
4545 # this is a workaround that would be fixed better by
4546 # improving the logical/physical id handling
4548 lu.cfg.SetDiskID(disk, instance.primary_node)
4550 return disks_ok, device_info
4553 def _StartInstanceDisks(lu, instance, force):
4554 """Start the disks of an instance.
4557 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4558 ignore_secondaries=force)
4560 _ShutdownInstanceDisks(lu, instance)
4561 if force is not None and not force:
4562 lu.proc.LogWarning("", hint="If the message above refers to a"
4564 " you can retry the operation using '--force'.")
4565 raise errors.OpExecError("Disk consistency error")
4568 class LUDeactivateInstanceDisks(NoHooksLU):
4569 """Shutdown an instance's disks.
4577 def ExpandNames(self):
4578 self._ExpandAndLockInstance()
4579 self.needed_locks[locking.LEVEL_NODE] = []
4580 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4582 def DeclareLocks(self, level):
4583 if level == locking.LEVEL_NODE:
4584 self._LockInstancesNodes()
4586 def CheckPrereq(self):
4587 """Check prerequisites.
4589 This checks that the instance is in the cluster.
4592 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4593 assert self.instance is not None, \
4594 "Cannot retrieve locked instance %s" % self.op.instance_name
4596 def Exec(self, feedback_fn):
4597 """Deactivate the disks
4600 instance = self.instance
4601 _SafeShutdownInstanceDisks(self, instance)
4604 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4605 """Shutdown block devices of an instance.
4607 This function checks if an instance is running, before calling
4608 _ShutdownInstanceDisks.
4611 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4612 _ShutdownInstanceDisks(lu, instance, disks=disks)
4615 def _ExpandCheckDisks(instance, disks):
4616 """Return the instance disks selected by the disks list
4618 @type disks: list of L{objects.Disk} or None
4619 @param disks: selected disks
4620 @rtype: list of L{objects.Disk}
4621 @return: selected instance disks to act on
4625 return instance.disks
4627 if not set(disks).issubset(instance.disks):
4628 raise errors.ProgrammerError("Can only act on disks belonging to the"
4633 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4634 """Shutdown block devices of an instance.
4636 This does the shutdown on all nodes of the instance.
4638 If the ignore_primary is false, errors on the primary node are
4643 disks = _ExpandCheckDisks(instance, disks)
4646 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4647 lu.cfg.SetDiskID(top_disk, node)
4648 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4649 msg = result.fail_msg
4651 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4652 disk.iv_name, node, msg)
4653 if not ignore_primary or node != instance.primary_node:
4658 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4659 """Checks if a node has enough free memory.
4661 This function check if a given node has the needed amount of free
4662 memory. In case the node has less memory or we cannot get the
4663 information from the node, this function raise an OpPrereqError
4666 @type lu: C{LogicalUnit}
4667 @param lu: a logical unit from which we get configuration data
4669 @param node: the node to check
4670 @type reason: C{str}
4671 @param reason: string to use in the error message
4672 @type requested: C{int}
4673 @param requested: the amount of memory in MiB to check for
4674 @type hypervisor_name: C{str}
4675 @param hypervisor_name: the hypervisor to ask for memory stats
4676 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4677 we cannot check the node
4680 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4681 nodeinfo[node].Raise("Can't get data from node %s" % node,
4682 prereq=True, ecode=errors.ECODE_ENVIRON)
4683 free_mem = nodeinfo[node].payload.get('memory_free', None)
4684 if not isinstance(free_mem, int):
4685 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4686 " was '%s'" % (node, free_mem),
4687 errors.ECODE_ENVIRON)
4688 if requested > free_mem:
4689 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4690 " needed %s MiB, available %s MiB" %
4691 (node, reason, requested, free_mem),
4695 def _CheckNodesFreeDisk(lu, nodenames, requested):
4696 """Checks if nodes have enough free disk space in the default VG.
4698 This function check if all given nodes have the needed amount of
4699 free disk. In case any node has less disk or we cannot get the
4700 information from the node, this function raise an OpPrereqError
4703 @type lu: C{LogicalUnit}
4704 @param lu: a logical unit from which we get configuration data
4705 @type nodenames: C{list}
4706 @param nodenames: the list of node names to check
4707 @type requested: C{int}
4708 @param requested: the amount of disk in MiB to check for
4709 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4710 we cannot check the node
4713 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4714 lu.cfg.GetHypervisorType())
4715 for node in nodenames:
4716 info = nodeinfo[node]
4717 info.Raise("Cannot get current information from node %s" % node,
4718 prereq=True, ecode=errors.ECODE_ENVIRON)
4719 vg_free = info.payload.get("vg_free", None)
4720 if not isinstance(vg_free, int):
4721 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4722 " result was '%s'" % (node, vg_free),
4723 errors.ECODE_ENVIRON)
4724 if requested > vg_free:
4725 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4726 " required %d MiB, available %d MiB" %
4727 (node, requested, vg_free),
4731 class LUStartupInstance(LogicalUnit):
4732 """Starts an instance.
4735 HPATH = "instance-start"
4736 HTYPE = constants.HTYPE_INSTANCE
4740 _PIgnoreOfflineNodes,
4741 ("hvparams", ht.EmptyDict, ht.TDict),
4742 ("beparams", ht.EmptyDict, ht.TDict),
4746 def CheckArguments(self):
4748 if self.op.beparams:
4749 # fill the beparams dict
4750 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4752 def ExpandNames(self):
4753 self._ExpandAndLockInstance()
4755 def BuildHooksEnv(self):
4758 This runs on master, primary and secondary nodes of the instance.
4762 "FORCE": self.op.force,
4764 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4765 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4768 def CheckPrereq(self):
4769 """Check prerequisites.
4771 This checks that the instance is in the cluster.
4774 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4775 assert self.instance is not None, \
4776 "Cannot retrieve locked instance %s" % self.op.instance_name
4779 if self.op.hvparams:
4780 # check hypervisor parameter syntax (locally)
4781 cluster = self.cfg.GetClusterInfo()
4782 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4783 filled_hvp = cluster.FillHV(instance)
4784 filled_hvp.update(self.op.hvparams)
4785 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4786 hv_type.CheckParameterSyntax(filled_hvp)
4787 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4789 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4791 if self.primary_offline and self.op.ignore_offline_nodes:
4792 self.proc.LogWarning("Ignoring offline primary node")
4794 if self.op.hvparams or self.op.beparams:
4795 self.proc.LogWarning("Overridden parameters are ignored")
4797 _CheckNodeOnline(self, instance.primary_node)
4799 bep = self.cfg.GetClusterInfo().FillBE(instance)
4801 # check bridges existence
4802 _CheckInstanceBridgesExist(self, instance)
4804 remote_info = self.rpc.call_instance_info(instance.primary_node,
4806 instance.hypervisor)
4807 remote_info.Raise("Error checking node %s" % instance.primary_node,
4808 prereq=True, ecode=errors.ECODE_ENVIRON)
4809 if not remote_info.payload: # not running already
4810 _CheckNodeFreeMemory(self, instance.primary_node,
4811 "starting instance %s" % instance.name,
4812 bep[constants.BE_MEMORY], instance.hypervisor)
4814 def Exec(self, feedback_fn):
4815 """Start the instance.
4818 instance = self.instance
4819 force = self.op.force
4821 self.cfg.MarkInstanceUp(instance.name)
4823 if self.primary_offline:
4824 assert self.op.ignore_offline_nodes
4825 self.proc.LogInfo("Primary node offline, marked instance as started")
4827 node_current = instance.primary_node
4829 _StartInstanceDisks(self, instance, force)
4831 result = self.rpc.call_instance_start(node_current, instance,
4832 self.op.hvparams, self.op.beparams)
4833 msg = result.fail_msg
4835 _ShutdownInstanceDisks(self, instance)
4836 raise errors.OpExecError("Could not start instance: %s" % msg)
4839 class LURebootInstance(LogicalUnit):
4840 """Reboot an instance.
4843 HPATH = "instance-reboot"
4844 HTYPE = constants.HTYPE_INSTANCE
4847 ("ignore_secondaries", False, ht.TBool),
4848 ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
4853 def ExpandNames(self):
4854 self._ExpandAndLockInstance()
4856 def BuildHooksEnv(self):
4859 This runs on master, primary and secondary nodes of the instance.
4863 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4864 "REBOOT_TYPE": self.op.reboot_type,
4865 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4867 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4868 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4871 def CheckPrereq(self):
4872 """Check prerequisites.
4874 This checks that the instance is in the cluster.
4877 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4878 assert self.instance is not None, \
4879 "Cannot retrieve locked instance %s" % self.op.instance_name
4881 _CheckNodeOnline(self, instance.primary_node)
4883 # check bridges existence
4884 _CheckInstanceBridgesExist(self, instance)
4886 def Exec(self, feedback_fn):
4887 """Reboot the instance.
4890 instance = self.instance
4891 ignore_secondaries = self.op.ignore_secondaries
4892 reboot_type = self.op.reboot_type
4894 node_current = instance.primary_node
4896 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4897 constants.INSTANCE_REBOOT_HARD]:
4898 for disk in instance.disks:
4899 self.cfg.SetDiskID(disk, node_current)
4900 result = self.rpc.call_instance_reboot(node_current, instance,
4902 self.op.shutdown_timeout)
4903 result.Raise("Could not reboot instance")
4905 result = self.rpc.call_instance_shutdown(node_current, instance,
4906 self.op.shutdown_timeout)
4907 result.Raise("Could not shutdown instance for full reboot")
4908 _ShutdownInstanceDisks(self, instance)
4909 _StartInstanceDisks(self, instance, ignore_secondaries)
4910 result = self.rpc.call_instance_start(node_current, instance, None, None)
4911 msg = result.fail_msg
4913 _ShutdownInstanceDisks(self, instance)
4914 raise errors.OpExecError("Could not start instance for"
4915 " full reboot: %s" % msg)
4917 self.cfg.MarkInstanceUp(instance.name)
4920 class LUShutdownInstance(LogicalUnit):
4921 """Shutdown an instance.
4924 HPATH = "instance-stop"
4925 HTYPE = constants.HTYPE_INSTANCE
4928 _PIgnoreOfflineNodes,
4929 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
4933 def ExpandNames(self):
4934 self._ExpandAndLockInstance()
4936 def BuildHooksEnv(self):
4939 This runs on master, primary and secondary nodes of the instance.
4942 env = _BuildInstanceHookEnvByObject(self, self.instance)
4943 env["TIMEOUT"] = self.op.timeout
4944 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4947 def CheckPrereq(self):
4948 """Check prerequisites.
4950 This checks that the instance is in the cluster.
4953 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4954 assert self.instance is not None, \
4955 "Cannot retrieve locked instance %s" % self.op.instance_name
4957 self.primary_offline = \
4958 self.cfg.GetNodeInfo(self.instance.primary_node).offline
4960 if self.primary_offline and self.op.ignore_offline_nodes:
4961 self.proc.LogWarning("Ignoring offline primary node")
4963 _CheckNodeOnline(self, self.instance.primary_node)
4965 def Exec(self, feedback_fn):
4966 """Shutdown the instance.
4969 instance = self.instance
4970 node_current = instance.primary_node
4971 timeout = self.op.timeout
4973 self.cfg.MarkInstanceDown(instance.name)
4975 if self.primary_offline:
4976 assert self.op.ignore_offline_nodes
4977 self.proc.LogInfo("Primary node offline, marked instance as stopped")
4979 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4980 msg = result.fail_msg
4982 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4984 _ShutdownInstanceDisks(self, instance)
4987 class LUReinstallInstance(LogicalUnit):
4988 """Reinstall an instance.
4991 HPATH = "instance-reinstall"
4992 HTYPE = constants.HTYPE_INSTANCE
4995 ("os_type", None, ht.TMaybeString),
4996 ("force_variant", False, ht.TBool),
4997 ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
5001 def ExpandNames(self):
5002 self._ExpandAndLockInstance()
5004 def BuildHooksEnv(self):
5007 This runs on master, primary and secondary nodes of the instance.
5010 env = _BuildInstanceHookEnvByObject(self, self.instance)
5011 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5014 def CheckPrereq(self):
5015 """Check prerequisites.
5017 This checks that the instance is in the cluster and is not running.
5020 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5021 assert instance is not None, \
5022 "Cannot retrieve locked instance %s" % self.op.instance_name
5023 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5024 " offline, cannot reinstall")
5025 for node in instance.secondary_nodes:
5026 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5027 " cannot reinstall")
5029 if instance.disk_template == constants.DT_DISKLESS:
5030 raise errors.OpPrereqError("Instance '%s' has no disks" %
5031 self.op.instance_name,
5033 _CheckInstanceDown(self, instance, "cannot reinstall")
5035 if self.op.os_type is not None:
5037 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5038 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5039 instance_os = self.op.os_type
5041 instance_os = instance.os
5043 nodelist = list(instance.all_nodes)
5045 if self.op.osparams:
5046 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5047 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5048 self.os_inst = i_osdict # the new dict (without defaults)
5052 self.instance = instance
5054 def Exec(self, feedback_fn):
5055 """Reinstall the instance.
5058 inst = self.instance
5060 if self.op.os_type is not None:
5061 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5062 inst.os = self.op.os_type
5063 # Write to configuration
5064 self.cfg.Update(inst, feedback_fn)
5066 _StartInstanceDisks(self, inst, None)
5068 feedback_fn("Running the instance OS create scripts...")
5069 # FIXME: pass debug option from opcode to backend
5070 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5071 self.op.debug_level,
5072 osparams=self.os_inst)
5073 result.Raise("Could not install OS for instance %s on node %s" %
5074 (inst.name, inst.primary_node))
5076 _ShutdownInstanceDisks(self, inst)
5079 class LURecreateInstanceDisks(LogicalUnit):
5080 """Recreate an instance's missing disks.
5083 HPATH = "instance-recreate-disks"
5084 HTYPE = constants.HTYPE_INSTANCE
5087 ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5091 def ExpandNames(self):
5092 self._ExpandAndLockInstance()
5094 def BuildHooksEnv(self):
5097 This runs on master, primary and secondary nodes of the instance.
5100 env = _BuildInstanceHookEnvByObject(self, self.instance)
5101 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5104 def CheckPrereq(self):
5105 """Check prerequisites.
5107 This checks that the instance is in the cluster and is not running.
5110 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5111 assert instance is not None, \
5112 "Cannot retrieve locked instance %s" % self.op.instance_name
5113 _CheckNodeOnline(self, instance.primary_node)
5115 if instance.disk_template == constants.DT_DISKLESS:
5116 raise errors.OpPrereqError("Instance '%s' has no disks" %
5117 self.op.instance_name, errors.ECODE_INVAL)
5118 _CheckInstanceDown(self, instance, "cannot recreate disks")
5120 if not self.op.disks:
5121 self.op.disks = range(len(instance.disks))
5123 for idx in self.op.disks:
5124 if idx >= len(instance.disks):
5125 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5128 self.instance = instance
5130 def Exec(self, feedback_fn):
5131 """Recreate the disks.
5135 for idx, _ in enumerate(self.instance.disks):
5136 if idx not in self.op.disks: # disk idx has not been passed in
5140 _CreateDisks(self, self.instance, to_skip=to_skip)
5143 class LURenameInstance(LogicalUnit):
5144 """Rename an instance.
5147 HPATH = "instance-rename"
5148 HTYPE = constants.HTYPE_INSTANCE
5151 ("new_name", ht.NoDefault, ht.TNonEmptyString),
5152 ("ip_check", False, ht.TBool),
5153 ("name_check", True, ht.TBool),
5156 def CheckArguments(self):
5160 if self.op.ip_check and not self.op.name_check:
5161 # TODO: make the ip check more flexible and not depend on the name check
5162 raise errors.OpPrereqError("Cannot do ip check without a name check",
5165 def BuildHooksEnv(self):
5168 This runs on master, primary and secondary nodes of the instance.
5171 env = _BuildInstanceHookEnvByObject(self, self.instance)
5172 env["INSTANCE_NEW_NAME"] = self.op.new_name
5173 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5176 def CheckPrereq(self):
5177 """Check prerequisites.
5179 This checks that the instance is in the cluster and is not running.
5182 self.op.instance_name = _ExpandInstanceName(self.cfg,
5183 self.op.instance_name)
5184 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5185 assert instance is not None
5186 _CheckNodeOnline(self, instance.primary_node)
5187 _CheckInstanceDown(self, instance, "cannot rename")
5188 self.instance = instance
5190 new_name = self.op.new_name
5191 if self.op.name_check:
5192 hostname = netutils.GetHostname(name=new_name)
5193 new_name = self.op.new_name = hostname.name
5194 if (self.op.ip_check and
5195 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5196 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5197 (hostname.ip, new_name),
5198 errors.ECODE_NOTUNIQUE)
5200 instance_list = self.cfg.GetInstanceList()
5201 if new_name in instance_list:
5202 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5203 new_name, errors.ECODE_EXISTS)
5205 def Exec(self, feedback_fn):
5206 """Reinstall the instance.
5209 inst = self.instance
5210 old_name = inst.name
5212 if inst.disk_template == constants.DT_FILE:
5213 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5215 self.cfg.RenameInstance(inst.name, self.op.new_name)
5216 # Change the instance lock. This is definitely safe while we hold the BGL
5217 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5218 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5220 # re-read the instance from the configuration after rename
5221 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5223 if inst.disk_template == constants.DT_FILE:
5224 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5225 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5226 old_file_storage_dir,
5227 new_file_storage_dir)
5228 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5229 " (but the instance has been renamed in Ganeti)" %
5230 (inst.primary_node, old_file_storage_dir,
5231 new_file_storage_dir))
5233 _StartInstanceDisks(self, inst, None)
5235 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5236 old_name, self.op.debug_level)
5237 msg = result.fail_msg
5239 msg = ("Could not run OS rename script for instance %s on node %s"
5240 " (but the instance has been renamed in Ganeti): %s" %
5241 (inst.name, inst.primary_node, msg))
5242 self.proc.LogWarning(msg)
5244 _ShutdownInstanceDisks(self, inst)
5249 class LURemoveInstance(LogicalUnit):
5250 """Remove an instance.
5253 HPATH = "instance-remove"
5254 HTYPE = constants.HTYPE_INSTANCE
5257 ("ignore_failures", False, ht.TBool),
5262 def ExpandNames(self):
5263 self._ExpandAndLockInstance()
5264 self.needed_locks[locking.LEVEL_NODE] = []
5265 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5267 def DeclareLocks(self, level):
5268 if level == locking.LEVEL_NODE:
5269 self._LockInstancesNodes()
5271 def BuildHooksEnv(self):
5274 This runs on master, primary and secondary nodes of the instance.
5277 env = _BuildInstanceHookEnvByObject(self, self.instance)
5278 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5279 nl = [self.cfg.GetMasterNode()]
5280 nl_post = list(self.instance.all_nodes) + nl
5281 return env, nl, nl_post
5283 def CheckPrereq(self):
5284 """Check prerequisites.
5286 This checks that the instance is in the cluster.
5289 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5290 assert self.instance is not None, \
5291 "Cannot retrieve locked instance %s" % self.op.instance_name
5293 def Exec(self, feedback_fn):
5294 """Remove the instance.
5297 instance = self.instance
5298 logging.info("Shutting down instance %s on node %s",
5299 instance.name, instance.primary_node)
5301 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5302 self.op.shutdown_timeout)
5303 msg = result.fail_msg
5305 if self.op.ignore_failures:
5306 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5308 raise errors.OpExecError("Could not shutdown instance %s on"
5310 (instance.name, instance.primary_node, msg))
5312 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5315 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5316 """Utility function to remove an instance.
5319 logging.info("Removing block devices for instance %s", instance.name)
5321 if not _RemoveDisks(lu, instance):
5322 if not ignore_failures:
5323 raise errors.OpExecError("Can't remove instance's disks")
5324 feedback_fn("Warning: can't remove instance's disks")
5326 logging.info("Removing instance %s out of cluster config", instance.name)
5328 lu.cfg.RemoveInstance(instance.name)
5330 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5331 "Instance lock removal conflict"
5333 # Remove lock for the instance
5334 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5337 class LUQueryInstances(NoHooksLU):
5338 """Logical unit for querying instances.
5341 # pylint: disable-msg=W0142
5343 ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
5344 ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5345 ("use_locking", False, ht.TBool),
5348 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5349 "serial_no", "ctime", "mtime", "uuid"]
5350 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5352 "disk_template", "ip", "mac", "bridge",
5353 "nic_mode", "nic_link",
5354 "sda_size", "sdb_size", "vcpus", "tags",
5355 "network_port", "beparams",
5356 r"(disk)\.(size)/([0-9]+)",
5357 r"(disk)\.(sizes)", "disk_usage",
5358 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5359 r"(nic)\.(bridge)/([0-9]+)",
5360 r"(nic)\.(macs|ips|modes|links|bridges)",
5361 r"(disk|nic)\.(count)",
5362 "hvparams", "custom_hvparams",
5363 "custom_beparams", "custom_nicparams",
5364 ] + _SIMPLE_FIELDS +
5366 for name in constants.HVS_PARAMETERS
5367 if name not in constants.HVC_GLOBALS] +
5369 for name in constants.BES_PARAMETERS])
5370 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5376 def CheckArguments(self):
5377 _CheckOutputFields(static=self._FIELDS_STATIC,
5378 dynamic=self._FIELDS_DYNAMIC,
5379 selected=self.op.output_fields)
5381 def ExpandNames(self):
5382 self.needed_locks = {}
5383 self.share_locks[locking.LEVEL_INSTANCE] = 1
5384 self.share_locks[locking.LEVEL_NODE] = 1
5387 self.wanted = _GetWantedInstances(self, self.op.names)
5389 self.wanted = locking.ALL_SET
5391 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5392 self.do_locking = self.do_node_query and self.op.use_locking
5394 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5395 self.needed_locks[locking.LEVEL_NODE] = []
5396 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5398 def DeclareLocks(self, level):
5399 if level == locking.LEVEL_NODE and self.do_locking:
5400 self._LockInstancesNodes()
5402 def Exec(self, feedback_fn):
5403 """Computes the list of nodes and their attributes.
5406 # pylint: disable-msg=R0912
5407 # way too many branches here
5408 all_info = self.cfg.GetAllInstancesInfo()
5409 if self.wanted == locking.ALL_SET:
5410 # caller didn't specify instance names, so ordering is not important
5412 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5414 instance_names = all_info.keys()
5415 instance_names = utils.NiceSort(instance_names)
5417 # caller did specify names, so we must keep the ordering
5419 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5421 tgt_set = all_info.keys()
5422 missing = set(self.wanted).difference(tgt_set)
5424 raise errors.OpExecError("Some instances were removed before"
5425 " retrieving their data: %s" % missing)
5426 instance_names = self.wanted
5428 instance_list = [all_info[iname] for iname in instance_names]
5430 # begin data gathering
5432 nodes = frozenset([inst.primary_node for inst in instance_list])
5433 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5437 if self.do_node_query:
5439 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5441 result = node_data[name]
5443 # offline nodes will be in both lists
5444 off_nodes.append(name)
5446 bad_nodes.append(name)
5449 live_data.update(result.payload)
5450 # else no instance is alive
5452 live_data = dict([(name, {}) for name in instance_names])
5454 # end data gathering
5459 cluster = self.cfg.GetClusterInfo()
5460 for instance in instance_list:
5462 i_hv = cluster.FillHV(instance, skip_globals=True)
5463 i_be = cluster.FillBE(instance)
5464 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5465 for field in self.op.output_fields:
5466 st_match = self._FIELDS_STATIC.Matches(field)
5467 if field in self._SIMPLE_FIELDS:
5468 val = getattr(instance, field)
5469 elif field == "pnode":
5470 val = instance.primary_node
5471 elif field == "snodes":
5472 val = list(instance.secondary_nodes)
5473 elif field == "admin_state":
5474 val = instance.admin_up
5475 elif field == "oper_state":
5476 if instance.primary_node in bad_nodes:
5479 val = bool(live_data.get(instance.name))
5480 elif field == "status":
5481 if instance.primary_node in off_nodes:
5482 val = "ERROR_nodeoffline"
5483 elif instance.primary_node in bad_nodes:
5484 val = "ERROR_nodedown"
5486 running = bool(live_data.get(instance.name))
5488 if instance.admin_up:
5493 if instance.admin_up:
5497 elif field == "oper_ram":
5498 if instance.primary_node in bad_nodes:
5500 elif instance.name in live_data:
5501 val = live_data[instance.name].get("memory", "?")
5504 elif field == "oper_vcpus":
5505 if instance.primary_node in bad_nodes:
5507 elif instance.name in live_data:
5508 val = live_data[instance.name].get("vcpus", "?")
5511 elif field == "vcpus":
5512 val = i_be[constants.BE_VCPUS]
5513 elif field == "disk_template":
5514 val = instance.disk_template
5517 val = instance.nics[0].ip
5520 elif field == "nic_mode":
5522 val = i_nicp[0][constants.NIC_MODE]
5525 elif field == "nic_link":
5527 val = i_nicp[0][constants.NIC_LINK]
5530 elif field == "bridge":
5531 if (instance.nics and
5532 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5533 val = i_nicp[0][constants.NIC_LINK]
5536 elif field == "mac":
5538 val = instance.nics[0].mac
5541 elif field == "custom_nicparams":
5542 val = [nic.nicparams for nic in instance.nics]
5543 elif field == "sda_size" or field == "sdb_size":
5544 idx = ord(field[2]) - ord('a')
5546 val = instance.FindDisk(idx).size
5547 except errors.OpPrereqError:
5549 elif field == "disk_usage": # total disk usage per node
5550 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5551 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5552 elif field == "tags":
5553 val = list(instance.GetTags())
5554 elif field == "custom_hvparams":
5555 val = instance.hvparams # not filled!
5556 elif field == "hvparams":
5558 elif (field.startswith(HVPREFIX) and
5559 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5560 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5561 val = i_hv.get(field[len(HVPREFIX):], None)
5562 elif field == "custom_beparams":
5563 val = instance.beparams
5564 elif field == "beparams":
5566 elif (field.startswith(BEPREFIX) and
5567 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5568 val = i_be.get(field[len(BEPREFIX):], None)
5569 elif st_match and st_match.groups():
5570 # matches a variable list
5571 st_groups = st_match.groups()
5572 if st_groups and st_groups[0] == "disk":
5573 if st_groups[1] == "count":
5574 val = len(instance.disks)
5575 elif st_groups[1] == "sizes":
5576 val = [disk.size for disk in instance.disks]
5577 elif st_groups[1] == "size":
5579 val = instance.FindDisk(st_groups[2]).size
5580 except errors.OpPrereqError:
5583 assert False, "Unhandled disk parameter"
5584 elif st_groups[0] == "nic":
5585 if st_groups[1] == "count":
5586 val = len(instance.nics)
5587 elif st_groups[1] == "macs":
5588 val = [nic.mac for nic in instance.nics]
5589 elif st_groups[1] == "ips":
5590 val = [nic.ip for nic in instance.nics]
5591 elif st_groups[1] == "modes":
5592 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5593 elif st_groups[1] == "links":
5594 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5595 elif st_groups[1] == "bridges":
5598 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5599 val.append(nicp[constants.NIC_LINK])
5604 nic_idx = int(st_groups[2])
5605 if nic_idx >= len(instance.nics):
5608 if st_groups[1] == "mac":
5609 val = instance.nics[nic_idx].mac
5610 elif st_groups[1] == "ip":
5611 val = instance.nics[nic_idx].ip
5612 elif st_groups[1] == "mode":
5613 val = i_nicp[nic_idx][constants.NIC_MODE]
5614 elif st_groups[1] == "link":
5615 val = i_nicp[nic_idx][constants.NIC_LINK]
5616 elif st_groups[1] == "bridge":
5617 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5618 if nic_mode == constants.NIC_MODE_BRIDGED:
5619 val = i_nicp[nic_idx][constants.NIC_LINK]
5623 assert False, "Unhandled NIC parameter"
5625 assert False, ("Declared but unhandled variable parameter '%s'" %
5628 assert False, "Declared but unhandled parameter '%s'" % field
5635 class LUFailoverInstance(LogicalUnit):
5636 """Failover an instance.
5639 HPATH = "instance-failover"
5640 HTYPE = constants.HTYPE_INSTANCE
5643 ("ignore_consistency", False, ht.TBool),
5648 def ExpandNames(self):
5649 self._ExpandAndLockInstance()
5650 self.needed_locks[locking.LEVEL_NODE] = []
5651 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5653 def DeclareLocks(self, level):
5654 if level == locking.LEVEL_NODE:
5655 self._LockInstancesNodes()
5657 def BuildHooksEnv(self):
5660 This runs on master, primary and secondary nodes of the instance.
5663 instance = self.instance
5664 source_node = instance.primary_node
5665 target_node = instance.secondary_nodes[0]
5667 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5668 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5669 "OLD_PRIMARY": source_node,
5670 "OLD_SECONDARY": target_node,
5671 "NEW_PRIMARY": target_node,
5672 "NEW_SECONDARY": source_node,
5674 env.update(_BuildInstanceHookEnvByObject(self, instance))
5675 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5677 nl_post.append(source_node)
5678 return env, nl, nl_post
5680 def CheckPrereq(self):
5681 """Check prerequisites.
5683 This checks that the instance is in the cluster.
5686 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5687 assert self.instance is not None, \
5688 "Cannot retrieve locked instance %s" % self.op.instance_name
5690 bep = self.cfg.GetClusterInfo().FillBE(instance)
5691 if instance.disk_template not in constants.DTS_NET_MIRROR:
5692 raise errors.OpPrereqError("Instance's disk layout is not"
5693 " network mirrored, cannot failover.",
5696 secondary_nodes = instance.secondary_nodes
5697 if not secondary_nodes:
5698 raise errors.ProgrammerError("no secondary node but using "
5699 "a mirrored disk template")
5701 target_node = secondary_nodes[0]
5702 _CheckNodeOnline(self, target_node)
5703 _CheckNodeNotDrained(self, target_node)
5704 if instance.admin_up:
5705 # check memory requirements on the secondary node
5706 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5707 instance.name, bep[constants.BE_MEMORY],
5708 instance.hypervisor)
5710 self.LogInfo("Not checking memory on the secondary node as"
5711 " instance will not be started")
5713 # check bridge existance
5714 _CheckInstanceBridgesExist(self, instance, node=target_node)
5716 def Exec(self, feedback_fn):
5717 """Failover an instance.
5719 The failover is done by shutting it down on its present node and
5720 starting it on the secondary.
5723 instance = self.instance
5724 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5726 source_node = instance.primary_node
5727 target_node = instance.secondary_nodes[0]
5729 if instance.admin_up:
5730 feedback_fn("* checking disk consistency between source and target")
5731 for dev in instance.disks:
5732 # for drbd, these are drbd over lvm
5733 if not _CheckDiskConsistency(self, dev, target_node, False):
5734 if not self.op.ignore_consistency:
5735 raise errors.OpExecError("Disk %s is degraded on target node,"
5736 " aborting failover." % dev.iv_name)
5738 feedback_fn("* not checking disk consistency as instance is not running")
5740 feedback_fn("* shutting down instance on source node")
5741 logging.info("Shutting down instance %s on node %s",
5742 instance.name, source_node)
5744 result = self.rpc.call_instance_shutdown(source_node, instance,
5745 self.op.shutdown_timeout)
5746 msg = result.fail_msg
5748 if self.op.ignore_consistency or primary_node.offline:
5749 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5750 " Proceeding anyway. Please make sure node"
5751 " %s is down. Error details: %s",
5752 instance.name, source_node, source_node, msg)
5754 raise errors.OpExecError("Could not shutdown instance %s on"
5756 (instance.name, source_node, msg))
5758 feedback_fn("* deactivating the instance's disks on source node")
5759 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5760 raise errors.OpExecError("Can't shut down the instance's disks.")
5762 instance.primary_node = target_node
5763 # distribute new instance config to the other nodes
5764 self.cfg.Update(instance, feedback_fn)
5766 # Only start the instance if it's marked as up
5767 if instance.admin_up:
5768 feedback_fn("* activating the instance's disks on target node")
5769 logging.info("Starting instance %s on node %s",
5770 instance.name, target_node)
5772 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5773 ignore_secondaries=True)
5775 _ShutdownInstanceDisks(self, instance)
5776 raise errors.OpExecError("Can't activate the instance's disks")
5778 feedback_fn("* starting the instance on the target node")
5779 result = self.rpc.call_instance_start(target_node, instance, None, None)
5780 msg = result.fail_msg
5782 _ShutdownInstanceDisks(self, instance)
5783 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5784 (instance.name, target_node, msg))
5787 class LUMigrateInstance(LogicalUnit):
5788 """Migrate an instance.
5790 This is migration without shutting down, compared to the failover,
5791 which is done with shutdown.
5794 HPATH = "instance-migrate"
5795 HTYPE = constants.HTYPE_INSTANCE
5800 ("cleanup", False, ht.TBool),
5805 def ExpandNames(self):
5806 self._ExpandAndLockInstance()
5808 self.needed_locks[locking.LEVEL_NODE] = []
5809 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5811 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5813 self.tasklets = [self._migrater]
5815 def DeclareLocks(self, level):
5816 if level == locking.LEVEL_NODE:
5817 self._LockInstancesNodes()
5819 def BuildHooksEnv(self):
5822 This runs on master, primary and secondary nodes of the instance.
5825 instance = self._migrater.instance
5826 source_node = instance.primary_node
5827 target_node = instance.secondary_nodes[0]
5828 env = _BuildInstanceHookEnvByObject(self, instance)
5829 env["MIGRATE_LIVE"] = self._migrater.live
5830 env["MIGRATE_CLEANUP"] = self.op.cleanup
5832 "OLD_PRIMARY": source_node,
5833 "OLD_SECONDARY": target_node,
5834 "NEW_PRIMARY": target_node,
5835 "NEW_SECONDARY": source_node,
5837 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5839 nl_post.append(source_node)
5840 return env, nl, nl_post
5843 class LUMoveInstance(LogicalUnit):
5844 """Move an instance by data-copying.
5847 HPATH = "instance-move"
5848 HTYPE = constants.HTYPE_INSTANCE
5851 ("target_node", ht.NoDefault, ht.TNonEmptyString),
5856 def ExpandNames(self):
5857 self._ExpandAndLockInstance()
5858 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5859 self.op.target_node = target_node
5860 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5861 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5863 def DeclareLocks(self, level):
5864 if level == locking.LEVEL_NODE:
5865 self._LockInstancesNodes(primary_only=True)
5867 def BuildHooksEnv(self):
5870 This runs on master, primary and secondary nodes of the instance.
5874 "TARGET_NODE": self.op.target_node,
5875 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5877 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5878 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5879 self.op.target_node]
5882 def CheckPrereq(self):
5883 """Check prerequisites.
5885 This checks that the instance is in the cluster.
5888 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5889 assert self.instance is not None, \
5890 "Cannot retrieve locked instance %s" % self.op.instance_name
5892 node = self.cfg.GetNodeInfo(self.op.target_node)
5893 assert node is not None, \
5894 "Cannot retrieve locked node %s" % self.op.target_node
5896 self.target_node = target_node = node.name
5898 if target_node == instance.primary_node:
5899 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5900 (instance.name, target_node),
5903 bep = self.cfg.GetClusterInfo().FillBE(instance)
5905 for idx, dsk in enumerate(instance.disks):
5906 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5907 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5908 " cannot copy" % idx, errors.ECODE_STATE)
5910 _CheckNodeOnline(self, target_node)
5911 _CheckNodeNotDrained(self, target_node)
5912 _CheckNodeVmCapable(self, target_node)
5914 if instance.admin_up:
5915 # check memory requirements on the secondary node
5916 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5917 instance.name, bep[constants.BE_MEMORY],
5918 instance.hypervisor)
5920 self.LogInfo("Not checking memory on the secondary node as"
5921 " instance will not be started")
5923 # check bridge existance
5924 _CheckInstanceBridgesExist(self, instance, node=target_node)
5926 def Exec(self, feedback_fn):
5927 """Move an instance.
5929 The move is done by shutting it down on its present node, copying
5930 the data over (slow) and starting it on the new node.
5933 instance = self.instance
5935 source_node = instance.primary_node
5936 target_node = self.target_node
5938 self.LogInfo("Shutting down instance %s on source node %s",
5939 instance.name, source_node)
5941 result = self.rpc.call_instance_shutdown(source_node, instance,
5942 self.op.shutdown_timeout)
5943 msg = result.fail_msg
5945 if self.op.ignore_consistency:
5946 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5947 " Proceeding anyway. Please make sure node"
5948 " %s is down. Error details: %s",
5949 instance.name, source_node, source_node, msg)
5951 raise errors.OpExecError("Could not shutdown instance %s on"
5953 (instance.name, source_node, msg))
5955 # create the target disks
5957 _CreateDisks(self, instance, target_node=target_node)
5958 except errors.OpExecError:
5959 self.LogWarning("Device creation failed, reverting...")
5961 _RemoveDisks(self, instance, target_node=target_node)
5963 self.cfg.ReleaseDRBDMinors(instance.name)
5966 cluster_name = self.cfg.GetClusterInfo().cluster_name
5969 # activate, get path, copy the data over
5970 for idx, disk in enumerate(instance.disks):
5971 self.LogInfo("Copying data for disk %d", idx)
5972 result = self.rpc.call_blockdev_assemble(target_node, disk,
5973 instance.name, True)
5975 self.LogWarning("Can't assemble newly created disk %d: %s",
5976 idx, result.fail_msg)
5977 errs.append(result.fail_msg)
5979 dev_path = result.payload
5980 result = self.rpc.call_blockdev_export(source_node, disk,
5981 target_node, dev_path,
5984 self.LogWarning("Can't copy data over for disk %d: %s",
5985 idx, result.fail_msg)
5986 errs.append(result.fail_msg)
5990 self.LogWarning("Some disks failed to copy, aborting")
5992 _RemoveDisks(self, instance, target_node=target_node)
5994 self.cfg.ReleaseDRBDMinors(instance.name)
5995 raise errors.OpExecError("Errors during disk copy: %s" %
5998 instance.primary_node = target_node
5999 self.cfg.Update(instance, feedback_fn)
6001 self.LogInfo("Removing the disks on the original node")
6002 _RemoveDisks(self, instance, target_node=source_node)
6004 # Only start the instance if it's marked as up
6005 if instance.admin_up:
6006 self.LogInfo("Starting instance %s on node %s",
6007 instance.name, target_node)
6009 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6010 ignore_secondaries=True)
6012 _ShutdownInstanceDisks(self, instance)
6013 raise errors.OpExecError("Can't activate the instance's disks")
6015 result = self.rpc.call_instance_start(target_node, instance, None, None)
6016 msg = result.fail_msg
6018 _ShutdownInstanceDisks(self, instance)
6019 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6020 (instance.name, target_node, msg))
6023 class LUMigrateNode(LogicalUnit):
6024 """Migrate all instances from a node.
6027 HPATH = "node-migrate"
6028 HTYPE = constants.HTYPE_NODE
6036 def ExpandNames(self):
6037 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6039 self.needed_locks = {
6040 locking.LEVEL_NODE: [self.op.node_name],
6043 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6045 # Create tasklets for migrating instances for all instances on this node
6049 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6050 logging.debug("Migrating instance %s", inst.name)
6051 names.append(inst.name)
6053 tasklets.append(TLMigrateInstance(self, inst.name, False))
6055 self.tasklets = tasklets
6057 # Declare instance locks
6058 self.needed_locks[locking.LEVEL_INSTANCE] = names
6060 def DeclareLocks(self, level):
6061 if level == locking.LEVEL_NODE:
6062 self._LockInstancesNodes()
6064 def BuildHooksEnv(self):
6067 This runs on the master, the primary and all the secondaries.
6071 "NODE_NAME": self.op.node_name,
6074 nl = [self.cfg.GetMasterNode()]
6076 return (env, nl, nl)
6079 class TLMigrateInstance(Tasklet):
6080 """Tasklet class for instance migration.
6083 @ivar live: whether the migration will be done live or non-live;
6084 this variable is initalized only after CheckPrereq has run
6087 def __init__(self, lu, instance_name, cleanup):
6088 """Initializes this class.
6091 Tasklet.__init__(self, lu)
6094 self.instance_name = instance_name
6095 self.cleanup = cleanup
6096 self.live = False # will be overridden later
6098 def CheckPrereq(self):
6099 """Check prerequisites.
6101 This checks that the instance is in the cluster.
6104 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6105 instance = self.cfg.GetInstanceInfo(instance_name)
6106 assert instance is not None
6108 if instance.disk_template != constants.DT_DRBD8:
6109 raise errors.OpPrereqError("Instance's disk layout is not"
6110 " drbd8, cannot migrate.", errors.ECODE_STATE)
6112 secondary_nodes = instance.secondary_nodes
6113 if not secondary_nodes:
6114 raise errors.ConfigurationError("No secondary node but using"
6115 " drbd8 disk template")
6117 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6119 target_node = secondary_nodes[0]
6120 # check memory requirements on the secondary node
6121 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6122 instance.name, i_be[constants.BE_MEMORY],
6123 instance.hypervisor)
6125 # check bridge existance
6126 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6128 if not self.cleanup:
6129 _CheckNodeNotDrained(self.lu, target_node)
6130 result = self.rpc.call_instance_migratable(instance.primary_node,
6132 result.Raise("Can't migrate, please use failover",
6133 prereq=True, ecode=errors.ECODE_STATE)
6135 self.instance = instance
6137 if self.lu.op.live is not None and self.lu.op.mode is not None:
6138 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6139 " parameters are accepted",
6141 if self.lu.op.live is not None:
6143 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6145 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6146 # reset the 'live' parameter to None so that repeated
6147 # invocations of CheckPrereq do not raise an exception
6148 self.lu.op.live = None
6149 elif self.lu.op.mode is None:
6150 # read the default value from the hypervisor
6151 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6152 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6154 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6156 def _WaitUntilSync(self):
6157 """Poll with custom rpc for disk sync.
6159 This uses our own step-based rpc call.
6162 self.feedback_fn("* wait until resync is done")
6166 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6168 self.instance.disks)
6170 for node, nres in result.items():
6171 nres.Raise("Cannot resync disks on node %s" % node)
6172 node_done, node_percent = nres.payload
6173 all_done = all_done and node_done
6174 if node_percent is not None:
6175 min_percent = min(min_percent, node_percent)
6177 if min_percent < 100:
6178 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6181 def _EnsureSecondary(self, node):
6182 """Demote a node to secondary.
6185 self.feedback_fn("* switching node %s to secondary mode" % node)
6187 for dev in self.instance.disks:
6188 self.cfg.SetDiskID(dev, node)
6190 result = self.rpc.call_blockdev_close(node, self.instance.name,
6191 self.instance.disks)
6192 result.Raise("Cannot change disk to secondary on node %s" % node)
6194 def _GoStandalone(self):
6195 """Disconnect from the network.
6198 self.feedback_fn("* changing into standalone mode")
6199 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6200 self.instance.disks)
6201 for node, nres in result.items():
6202 nres.Raise("Cannot disconnect disks node %s" % node)
6204 def _GoReconnect(self, multimaster):
6205 """Reconnect to the network.
6211 msg = "single-master"
6212 self.feedback_fn("* changing disks into %s mode" % msg)
6213 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6214 self.instance.disks,
6215 self.instance.name, multimaster)
6216 for node, nres in result.items():
6217 nres.Raise("Cannot change disks config on node %s" % node)
6219 def _ExecCleanup(self):
6220 """Try to cleanup after a failed migration.
6222 The cleanup is done by:
6223 - check that the instance is running only on one node
6224 (and update the config if needed)
6225 - change disks on its secondary node to secondary
6226 - wait until disks are fully synchronized
6227 - disconnect from the network
6228 - change disks into single-master mode
6229 - wait again until disks are fully synchronized
6232 instance = self.instance
6233 target_node = self.target_node
6234 source_node = self.source_node
6236 # check running on only one node
6237 self.feedback_fn("* checking where the instance actually runs"
6238 " (if this hangs, the hypervisor might be in"
6240 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6241 for node, result in ins_l.items():
6242 result.Raise("Can't contact node %s" % node)
6244 runningon_source = instance.name in ins_l[source_node].payload
6245 runningon_target = instance.name in ins_l[target_node].payload
6247 if runningon_source and runningon_target:
6248 raise errors.OpExecError("Instance seems to be running on two nodes,"
6249 " or the hypervisor is confused. You will have"
6250 " to ensure manually that it runs only on one"
6251 " and restart this operation.")
6253 if not (runningon_source or runningon_target):
6254 raise errors.OpExecError("Instance does not seem to be running at all."
6255 " In this case, it's safer to repair by"
6256 " running 'gnt-instance stop' to ensure disk"
6257 " shutdown, and then restarting it.")
6259 if runningon_target:
6260 # the migration has actually succeeded, we need to update the config
6261 self.feedback_fn("* instance running on secondary node (%s),"
6262 " updating config" % target_node)
6263 instance.primary_node = target_node
6264 self.cfg.Update(instance, self.feedback_fn)
6265 demoted_node = source_node
6267 self.feedback_fn("* instance confirmed to be running on its"
6268 " primary node (%s)" % source_node)
6269 demoted_node = target_node
6271 self._EnsureSecondary(demoted_node)
6273 self._WaitUntilSync()
6274 except errors.OpExecError:
6275 # we ignore here errors, since if the device is standalone, it
6276 # won't be able to sync
6278 self._GoStandalone()
6279 self._GoReconnect(False)
6280 self._WaitUntilSync()
6282 self.feedback_fn("* done")
6284 def _RevertDiskStatus(self):
6285 """Try to revert the disk status after a failed migration.
6288 target_node = self.target_node
6290 self._EnsureSecondary(target_node)
6291 self._GoStandalone()
6292 self._GoReconnect(False)
6293 self._WaitUntilSync()
6294 except errors.OpExecError, err:
6295 self.lu.LogWarning("Migration failed and I can't reconnect the"
6296 " drives: error '%s'\n"
6297 "Please look and recover the instance status" %
6300 def _AbortMigration(self):
6301 """Call the hypervisor code to abort a started migration.
6304 instance = self.instance
6305 target_node = self.target_node
6306 migration_info = self.migration_info
6308 abort_result = self.rpc.call_finalize_migration(target_node,
6312 abort_msg = abort_result.fail_msg
6314 logging.error("Aborting migration failed on target node %s: %s",
6315 target_node, abort_msg)
6316 # Don't raise an exception here, as we stil have to try to revert the
6317 # disk status, even if this step failed.
6319 def _ExecMigration(self):
6320 """Migrate an instance.
6322 The migrate is done by:
6323 - change the disks into dual-master mode
6324 - wait until disks are fully synchronized again
6325 - migrate the instance
6326 - change disks on the new secondary node (the old primary) to secondary
6327 - wait until disks are fully synchronized
6328 - change disks into single-master mode
6331 instance = self.instance
6332 target_node = self.target_node
6333 source_node = self.source_node
6335 self.feedback_fn("* checking disk consistency between source and target")
6336 for dev in instance.disks:
6337 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6338 raise errors.OpExecError("Disk %s is degraded or not fully"
6339 " synchronized on target node,"
6340 " aborting migrate." % dev.iv_name)
6342 # First get the migration information from the remote node
6343 result = self.rpc.call_migration_info(source_node, instance)
6344 msg = result.fail_msg
6346 log_err = ("Failed fetching source migration information from %s: %s" %
6348 logging.error(log_err)
6349 raise errors.OpExecError(log_err)
6351 self.migration_info = migration_info = result.payload
6353 # Then switch the disks to master/master mode
6354 self._EnsureSecondary(target_node)
6355 self._GoStandalone()
6356 self._GoReconnect(True)
6357 self._WaitUntilSync()
6359 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6360 result = self.rpc.call_accept_instance(target_node,
6363 self.nodes_ip[target_node])
6365 msg = result.fail_msg
6367 logging.error("Instance pre-migration failed, trying to revert"
6368 " disk status: %s", msg)
6369 self.feedback_fn("Pre-migration failed, aborting")
6370 self._AbortMigration()
6371 self._RevertDiskStatus()
6372 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6373 (instance.name, msg))
6375 self.feedback_fn("* migrating instance to %s" % target_node)
6377 result = self.rpc.call_instance_migrate(source_node, instance,
6378 self.nodes_ip[target_node],
6380 msg = result.fail_msg
6382 logging.error("Instance migration failed, trying to revert"
6383 " disk status: %s", msg)
6384 self.feedback_fn("Migration failed, aborting")
6385 self._AbortMigration()
6386 self._RevertDiskStatus()
6387 raise errors.OpExecError("Could not migrate instance %s: %s" %
6388 (instance.name, msg))
6391 instance.primary_node = target_node
6392 # distribute new instance config to the other nodes
6393 self.cfg.Update(instance, self.feedback_fn)
6395 result = self.rpc.call_finalize_migration(target_node,
6399 msg = result.fail_msg
6401 logging.error("Instance migration succeeded, but finalization failed:"
6403 raise errors.OpExecError("Could not finalize instance migration: %s" %
6406 self._EnsureSecondary(source_node)
6407 self._WaitUntilSync()
6408 self._GoStandalone()
6409 self._GoReconnect(False)
6410 self._WaitUntilSync()
6412 self.feedback_fn("* done")
6414 def Exec(self, feedback_fn):
6415 """Perform the migration.
6418 feedback_fn("Migrating instance %s" % self.instance.name)
6420 self.feedback_fn = feedback_fn
6422 self.source_node = self.instance.primary_node
6423 self.target_node = self.instance.secondary_nodes[0]
6424 self.all_nodes = [self.source_node, self.target_node]
6426 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6427 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6431 return self._ExecCleanup()
6433 return self._ExecMigration()
6436 def _CreateBlockDev(lu, node, instance, device, force_create,
6438 """Create a tree of block devices on a given node.
6440 If this device type has to be created on secondaries, create it and
6443 If not, just recurse to children keeping the same 'force' value.
6445 @param lu: the lu on whose behalf we execute
6446 @param node: the node on which to create the device
6447 @type instance: L{objects.Instance}
6448 @param instance: the instance which owns the device
6449 @type device: L{objects.Disk}
6450 @param device: the device to create
6451 @type force_create: boolean
6452 @param force_create: whether to force creation of this device; this
6453 will be change to True whenever we find a device which has
6454 CreateOnSecondary() attribute
6455 @param info: the extra 'metadata' we should attach to the device
6456 (this will be represented as a LVM tag)
6457 @type force_open: boolean
6458 @param force_open: this parameter will be passes to the
6459 L{backend.BlockdevCreate} function where it specifies
6460 whether we run on primary or not, and it affects both
6461 the child assembly and the device own Open() execution
6464 if device.CreateOnSecondary():
6468 for child in device.children:
6469 _CreateBlockDev(lu, node, instance, child, force_create,
6472 if not force_create:
6475 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6478 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6479 """Create a single block device on a given node.
6481 This will not recurse over children of the device, so they must be
6484 @param lu: the lu on whose behalf we execute
6485 @param node: the node on which to create the device
6486 @type instance: L{objects.Instance}
6487 @param instance: the instance which owns the device
6488 @type device: L{objects.Disk}
6489 @param device: the device to create
6490 @param info: the extra 'metadata' we should attach to the device
6491 (this will be represented as a LVM tag)
6492 @type force_open: boolean
6493 @param force_open: this parameter will be passes to the
6494 L{backend.BlockdevCreate} function where it specifies
6495 whether we run on primary or not, and it affects both
6496 the child assembly and the device own Open() execution
6499 lu.cfg.SetDiskID(device, node)
6500 result = lu.rpc.call_blockdev_create(node, device, device.size,
6501 instance.name, force_open, info)
6502 result.Raise("Can't create block device %s on"
6503 " node %s for instance %s" % (device, node, instance.name))
6504 if device.physical_id is None:
6505 device.physical_id = result.payload
6508 def _GenerateUniqueNames(lu, exts):
6509 """Generate a suitable LV name.
6511 This will generate a logical volume name for the given instance.
6516 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6517 results.append("%s%s" % (new_id, val))
6521 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6523 """Generate a drbd8 device complete with its children.
6526 port = lu.cfg.AllocatePort()
6527 vgname = lu.cfg.GetVGName()
6528 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6529 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6530 logical_id=(vgname, names[0]))
6531 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6532 logical_id=(vgname, names[1]))
6533 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6534 logical_id=(primary, secondary, port,
6537 children=[dev_data, dev_meta],
6542 def _GenerateDiskTemplate(lu, template_name,
6543 instance_name, primary_node,
6544 secondary_nodes, disk_info,
6545 file_storage_dir, file_driver,
6547 """Generate the entire disk layout for a given template type.
6550 #TODO: compute space requirements
6552 vgname = lu.cfg.GetVGName()
6553 disk_count = len(disk_info)
6555 if template_name == constants.DT_DISKLESS:
6557 elif template_name == constants.DT_PLAIN:
6558 if len(secondary_nodes) != 0:
6559 raise errors.ProgrammerError("Wrong template configuration")
6561 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6562 for i in range(disk_count)])
6563 for idx, disk in enumerate(disk_info):
6564 disk_index = idx + base_index
6565 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6566 logical_id=(vgname, names[idx]),
6567 iv_name="disk/%d" % disk_index,
6569 disks.append(disk_dev)
6570 elif template_name == constants.DT_DRBD8:
6571 if len(secondary_nodes) != 1:
6572 raise errors.ProgrammerError("Wrong template configuration")
6573 remote_node = secondary_nodes[0]
6574 minors = lu.cfg.AllocateDRBDMinor(
6575 [primary_node, remote_node] * len(disk_info), instance_name)
6578 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6579 for i in range(disk_count)]):
6580 names.append(lv_prefix + "_data")
6581 names.append(lv_prefix + "_meta")
6582 for idx, disk in enumerate(disk_info):
6583 disk_index = idx + base_index
6584 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6585 disk["size"], names[idx*2:idx*2+2],
6586 "disk/%d" % disk_index,
6587 minors[idx*2], minors[idx*2+1])
6588 disk_dev.mode = disk["mode"]
6589 disks.append(disk_dev)
6590 elif template_name == constants.DT_FILE:
6591 if len(secondary_nodes) != 0:
6592 raise errors.ProgrammerError("Wrong template configuration")
6594 _RequireFileStorage()
6596 for idx, disk in enumerate(disk_info):
6597 disk_index = idx + base_index
6598 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6599 iv_name="disk/%d" % disk_index,
6600 logical_id=(file_driver,
6601 "%s/disk%d" % (file_storage_dir,
6604 disks.append(disk_dev)
6606 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6610 def _GetInstanceInfoText(instance):
6611 """Compute that text that should be added to the disk's metadata.
6614 return "originstname+%s" % instance.name
6617 def _CalcEta(time_taken, written, total_size):
6618 """Calculates the ETA based on size written and total size.
6620 @param time_taken: The time taken so far
6621 @param written: amount written so far
6622 @param total_size: The total size of data to be written
6623 @return: The remaining time in seconds
6626 avg_time = time_taken / float(written)
6627 return (total_size - written) * avg_time
6630 def _WipeDisks(lu, instance):
6631 """Wipes instance disks.
6633 @type lu: L{LogicalUnit}
6634 @param lu: the logical unit on whose behalf we execute
6635 @type instance: L{objects.Instance}
6636 @param instance: the instance whose disks we should create
6637 @return: the success of the wipe
6640 node = instance.primary_node
6641 for idx, device in enumerate(instance.disks):
6642 lu.LogInfo("* Wiping disk %d", idx)
6643 logging.info("Wiping disk %d for instance %s", idx, instance.name)
6645 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6646 # MAX_WIPE_CHUNK at max
6647 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6648 constants.MIN_WIPE_CHUNK_PERCENT)
6653 start_time = time.time()
6655 while offset < size:
6656 wipe_size = min(wipe_chunk_size, size - offset)
6657 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6658 result.Raise("Could not wipe disk %d at offset %d for size %d" %
6659 (idx, offset, wipe_size))
6662 if now - last_output >= 60:
6663 eta = _CalcEta(now - start_time, offset, size)
6664 lu.LogInfo(" - done: %.1f%% ETA: %s" %
6665 (offset / float(size) * 100, utils.FormatSeconds(eta)))
6669 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6670 """Create all disks for an instance.
6672 This abstracts away some work from AddInstance.
6674 @type lu: L{LogicalUnit}
6675 @param lu: the logical unit on whose behalf we execute
6676 @type instance: L{objects.Instance}
6677 @param instance: the instance whose disks we should create
6679 @param to_skip: list of indices to skip
6680 @type target_node: string
6681 @param target_node: if passed, overrides the target node for creation
6683 @return: the success of the creation
6686 info = _GetInstanceInfoText(instance)
6687 if target_node is None:
6688 pnode = instance.primary_node
6689 all_nodes = instance.all_nodes
6694 if instance.disk_template == constants.DT_FILE:
6695 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6696 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6698 result.Raise("Failed to create directory '%s' on"
6699 " node %s" % (file_storage_dir, pnode))
6701 # Note: this needs to be kept in sync with adding of disks in
6702 # LUSetInstanceParams
6703 for idx, device in enumerate(instance.disks):
6704 if to_skip and idx in to_skip:
6706 logging.info("Creating volume %s for instance %s",
6707 device.iv_name, instance.name)
6709 for node in all_nodes:
6710 f_create = node == pnode
6711 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6714 def _RemoveDisks(lu, instance, target_node=None):
6715 """Remove all disks for an instance.
6717 This abstracts away some work from `AddInstance()` and
6718 `RemoveInstance()`. Note that in case some of the devices couldn't
6719 be removed, the removal will continue with the other ones (compare
6720 with `_CreateDisks()`).
6722 @type lu: L{LogicalUnit}
6723 @param lu: the logical unit on whose behalf we execute
6724 @type instance: L{objects.Instance}
6725 @param instance: the instance whose disks we should remove
6726 @type target_node: string
6727 @param target_node: used to override the node on which to remove the disks
6729 @return: the success of the removal
6732 logging.info("Removing block devices for instance %s", instance.name)
6735 for device in instance.disks:
6737 edata = [(target_node, device)]
6739 edata = device.ComputeNodeTree(instance.primary_node)
6740 for node, disk in edata:
6741 lu.cfg.SetDiskID(disk, node)
6742 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6744 lu.LogWarning("Could not remove block device %s on node %s,"
6745 " continuing anyway: %s", device.iv_name, node, msg)
6748 if instance.disk_template == constants.DT_FILE:
6749 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6753 tgt = instance.primary_node
6754 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6756 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6757 file_storage_dir, instance.primary_node, result.fail_msg)
6763 def _ComputeDiskSize(disk_template, disks):
6764 """Compute disk size requirements in the volume group
6767 # Required free disk space as a function of disk and swap space
6769 constants.DT_DISKLESS: None,
6770 constants.DT_PLAIN: sum(d["size"] for d in disks),
6771 # 128 MB are added for drbd metadata for each disk
6772 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6773 constants.DT_FILE: None,
6776 if disk_template not in req_size_dict:
6777 raise errors.ProgrammerError("Disk template '%s' size requirement"
6778 " is unknown" % disk_template)
6780 return req_size_dict[disk_template]
6783 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6784 """Hypervisor parameter validation.
6786 This function abstract the hypervisor parameter validation to be
6787 used in both instance create and instance modify.
6789 @type lu: L{LogicalUnit}
6790 @param lu: the logical unit for which we check
6791 @type nodenames: list
6792 @param nodenames: the list of nodes on which we should check
6793 @type hvname: string
6794 @param hvname: the name of the hypervisor we should use
6795 @type hvparams: dict
6796 @param hvparams: the parameters which we need to check
6797 @raise errors.OpPrereqError: if the parameters are not valid
6800 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6803 for node in nodenames:
6807 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6810 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6811 """OS parameters validation.
6813 @type lu: L{LogicalUnit}
6814 @param lu: the logical unit for which we check
6815 @type required: boolean
6816 @param required: whether the validation should fail if the OS is not
6818 @type nodenames: list
6819 @param nodenames: the list of nodes on which we should check
6820 @type osname: string
6821 @param osname: the name of the hypervisor we should use
6822 @type osparams: dict
6823 @param osparams: the parameters which we need to check
6824 @raise errors.OpPrereqError: if the parameters are not valid
6827 result = lu.rpc.call_os_validate(required, nodenames, osname,
6828 [constants.OS_VALIDATE_PARAMETERS],
6830 for node, nres in result.items():
6831 # we don't check for offline cases since this should be run only
6832 # against the master node and/or an instance's nodes
6833 nres.Raise("OS Parameters validation failed on node %s" % node)
6834 if not nres.payload:
6835 lu.LogInfo("OS %s not found on node %s, validation skipped",
6839 class LUCreateInstance(LogicalUnit):
6840 """Create an instance.
6843 HPATH = "instance-add"
6844 HTYPE = constants.HTYPE_INSTANCE
6847 ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6848 ("start", True, ht.TBool),
6849 ("wait_for_sync", True, ht.TBool),
6850 ("ip_check", True, ht.TBool),
6851 ("name_check", True, ht.TBool),
6852 ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6853 ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6854 ("hvparams", ht.EmptyDict, ht.TDict),
6855 ("beparams", ht.EmptyDict, ht.TDict),
6856 ("osparams", ht.EmptyDict, ht.TDict),
6857 ("no_install", None, ht.TMaybeBool),
6858 ("os_type", None, ht.TMaybeString),
6859 ("force_variant", False, ht.TBool),
6860 ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6861 ("source_x509_ca", None, ht.TMaybeString),
6862 ("source_instance_name", None, ht.TMaybeString),
6863 ("source_shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
6865 ("src_node", None, ht.TMaybeString),
6866 ("src_path", None, ht.TMaybeString),
6867 ("pnode", None, ht.TMaybeString),
6868 ("snode", None, ht.TMaybeString),
6869 ("iallocator", None, ht.TMaybeString),
6870 ("hypervisor", None, ht.TMaybeString),
6871 ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6872 ("identify_defaults", False, ht.TBool),
6873 ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6874 ("file_storage_dir", None, ht.TMaybeString),
6878 def CheckArguments(self):
6882 # do not require name_check to ease forward/backward compatibility
6884 if self.op.no_install and self.op.start:
6885 self.LogInfo("No-installation mode selected, disabling startup")
6886 self.op.start = False
6887 # validate/normalize the instance name
6888 self.op.instance_name = \
6889 netutils.Hostname.GetNormalizedName(self.op.instance_name)
6891 if self.op.ip_check and not self.op.name_check:
6892 # TODO: make the ip check more flexible and not depend on the name check
6893 raise errors.OpPrereqError("Cannot do ip check without a name check",
6896 # check nics' parameter names
6897 for nic in self.op.nics:
6898 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6900 # check disks. parameter names and consistent adopt/no-adopt strategy
6901 has_adopt = has_no_adopt = False
6902 for disk in self.op.disks:
6903 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6908 if has_adopt and has_no_adopt:
6909 raise errors.OpPrereqError("Either all disks are adopted or none is",
6912 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6913 raise errors.OpPrereqError("Disk adoption is not supported for the"
6914 " '%s' disk template" %
6915 self.op.disk_template,
6917 if self.op.iallocator is not None:
6918 raise errors.OpPrereqError("Disk adoption not allowed with an"
6919 " iallocator script", errors.ECODE_INVAL)
6920 if self.op.mode == constants.INSTANCE_IMPORT:
6921 raise errors.OpPrereqError("Disk adoption not allowed for"
6922 " instance import", errors.ECODE_INVAL)
6924 self.adopt_disks = has_adopt
6926 # instance name verification
6927 if self.op.name_check:
6928 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6929 self.op.instance_name = self.hostname1.name
6930 # used in CheckPrereq for ip ping check
6931 self.check_ip = self.hostname1.ip
6933 self.check_ip = None
6935 # file storage checks
6936 if (self.op.file_driver and
6937 not self.op.file_driver in constants.FILE_DRIVER):
6938 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6939 self.op.file_driver, errors.ECODE_INVAL)
6941 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6942 raise errors.OpPrereqError("File storage directory path not absolute",
6945 ### Node/iallocator related checks
6946 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6948 if self.op.pnode is not None:
6949 if self.op.disk_template in constants.DTS_NET_MIRROR:
6950 if self.op.snode is None:
6951 raise errors.OpPrereqError("The networked disk templates need"
6952 " a mirror node", errors.ECODE_INVAL)
6954 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6956 self.op.snode = None
6958 self._cds = _GetClusterDomainSecret()
6960 if self.op.mode == constants.INSTANCE_IMPORT:
6961 # On import force_variant must be True, because if we forced it at
6962 # initial install, our only chance when importing it back is that it
6964 self.op.force_variant = True
6966 if self.op.no_install:
6967 self.LogInfo("No-installation mode has no effect during import")
6969 elif self.op.mode == constants.INSTANCE_CREATE:
6970 if self.op.os_type is None:
6971 raise errors.OpPrereqError("No guest OS specified",
6973 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6974 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6975 " installation" % self.op.os_type,
6977 if self.op.disk_template is None:
6978 raise errors.OpPrereqError("No disk template specified",
6981 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6982 # Check handshake to ensure both clusters have the same domain secret
6983 src_handshake = self.op.source_handshake
6984 if not src_handshake:
6985 raise errors.OpPrereqError("Missing source handshake",
6988 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6991 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6994 # Load and check source CA
6995 self.source_x509_ca_pem = self.op.source_x509_ca
6996 if not self.source_x509_ca_pem:
6997 raise errors.OpPrereqError("Missing source X509 CA",
7001 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7003 except OpenSSL.crypto.Error, err:
7004 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7005 (err, ), errors.ECODE_INVAL)
7007 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7008 if errcode is not None:
7009 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7012 self.source_x509_ca = cert
7014 src_instance_name = self.op.source_instance_name
7015 if not src_instance_name:
7016 raise errors.OpPrereqError("Missing source instance name",
7019 self.source_instance_name = \
7020 netutils.GetHostname(name=src_instance_name).name
7023 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7024 self.op.mode, errors.ECODE_INVAL)
7026 def ExpandNames(self):
7027 """ExpandNames for CreateInstance.
7029 Figure out the right locks for instance creation.
7032 self.needed_locks = {}
7034 instance_name = self.op.instance_name
7035 # this is just a preventive check, but someone might still add this
7036 # instance in the meantime, and creation will fail at lock-add time
7037 if instance_name in self.cfg.GetInstanceList():
7038 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7039 instance_name, errors.ECODE_EXISTS)
7041 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7043 if self.op.iallocator:
7044 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7046 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7047 nodelist = [self.op.pnode]
7048 if self.op.snode is not None:
7049 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7050 nodelist.append(self.op.snode)
7051 self.needed_locks[locking.LEVEL_NODE] = nodelist
7053 # in case of import lock the source node too
7054 if self.op.mode == constants.INSTANCE_IMPORT:
7055 src_node = self.op.src_node
7056 src_path = self.op.src_path
7058 if src_path is None:
7059 self.op.src_path = src_path = self.op.instance_name
7061 if src_node is None:
7062 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7063 self.op.src_node = None
7064 if os.path.isabs(src_path):
7065 raise errors.OpPrereqError("Importing an instance from an absolute"
7066 " path requires a source node option.",
7069 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7070 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7071 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7072 if not os.path.isabs(src_path):
7073 self.op.src_path = src_path = \
7074 utils.PathJoin(constants.EXPORT_DIR, src_path)
7076 def _RunAllocator(self):
7077 """Run the allocator based on input opcode.
7080 nics = [n.ToDict() for n in self.nics]
7081 ial = IAllocator(self.cfg, self.rpc,
7082 mode=constants.IALLOCATOR_MODE_ALLOC,
7083 name=self.op.instance_name,
7084 disk_template=self.op.disk_template,
7087 vcpus=self.be_full[constants.BE_VCPUS],
7088 mem_size=self.be_full[constants.BE_MEMORY],
7091 hypervisor=self.op.hypervisor,
7094 ial.Run(self.op.iallocator)
7097 raise errors.OpPrereqError("Can't compute nodes using"
7098 " iallocator '%s': %s" %
7099 (self.op.iallocator, ial.info),
7101 if len(ial.result) != ial.required_nodes:
7102 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7103 " of nodes (%s), required %s" %
7104 (self.op.iallocator, len(ial.result),
7105 ial.required_nodes), errors.ECODE_FAULT)
7106 self.op.pnode = ial.result[0]
7107 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7108 self.op.instance_name, self.op.iallocator,
7109 utils.CommaJoin(ial.result))
7110 if ial.required_nodes == 2:
7111 self.op.snode = ial.result[1]
7113 def BuildHooksEnv(self):
7116 This runs on master, primary and secondary nodes of the instance.
7120 "ADD_MODE": self.op.mode,
7122 if self.op.mode == constants.INSTANCE_IMPORT:
7123 env["SRC_NODE"] = self.op.src_node
7124 env["SRC_PATH"] = self.op.src_path
7125 env["SRC_IMAGES"] = self.src_images
7127 env.update(_BuildInstanceHookEnv(
7128 name=self.op.instance_name,
7129 primary_node=self.op.pnode,
7130 secondary_nodes=self.secondaries,
7131 status=self.op.start,
7132 os_type=self.op.os_type,
7133 memory=self.be_full[constants.BE_MEMORY],
7134 vcpus=self.be_full[constants.BE_VCPUS],
7135 nics=_NICListToTuple(self, self.nics),
7136 disk_template=self.op.disk_template,
7137 disks=[(d["size"], d["mode"]) for d in self.disks],
7140 hypervisor_name=self.op.hypervisor,
7143 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7147 def _ReadExportInfo(self):
7148 """Reads the export information from disk.
7150 It will override the opcode source node and path with the actual
7151 information, if these two were not specified before.
7153 @return: the export information
7156 assert self.op.mode == constants.INSTANCE_IMPORT
7158 src_node = self.op.src_node
7159 src_path = self.op.src_path
7161 if src_node is None:
7162 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7163 exp_list = self.rpc.call_export_list(locked_nodes)
7165 for node in exp_list:
7166 if exp_list[node].fail_msg:
7168 if src_path in exp_list[node].payload:
7170 self.op.src_node = src_node = node
7171 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7175 raise errors.OpPrereqError("No export found for relative path %s" %
7176 src_path, errors.ECODE_INVAL)
7178 _CheckNodeOnline(self, src_node)
7179 result = self.rpc.call_export_info(src_node, src_path)
7180 result.Raise("No export or invalid export found in dir %s" % src_path)
7182 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7183 if not export_info.has_section(constants.INISECT_EXP):
7184 raise errors.ProgrammerError("Corrupted export config",
7185 errors.ECODE_ENVIRON)
7187 ei_version = export_info.get(constants.INISECT_EXP, "version")
7188 if (int(ei_version) != constants.EXPORT_VERSION):
7189 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7190 (ei_version, constants.EXPORT_VERSION),
7191 errors.ECODE_ENVIRON)
7194 def _ReadExportParams(self, einfo):
7195 """Use export parameters as defaults.
7197 In case the opcode doesn't specify (as in override) some instance
7198 parameters, then try to use them from the export information, if
7202 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7204 if self.op.disk_template is None:
7205 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7206 self.op.disk_template = einfo.get(constants.INISECT_INS,
7209 raise errors.OpPrereqError("No disk template specified and the export"
7210 " is missing the disk_template information",
7213 if not self.op.disks:
7214 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7216 # TODO: import the disk iv_name too
7217 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7218 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7219 disks.append({"size": disk_sz})
7220 self.op.disks = disks
7222 raise errors.OpPrereqError("No disk info specified and the export"
7223 " is missing the disk information",
7226 if (not self.op.nics and
7227 einfo.has_option(constants.INISECT_INS, "nic_count")):
7229 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7231 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7232 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7237 if (self.op.hypervisor is None and
7238 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7239 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7240 if einfo.has_section(constants.INISECT_HYP):
7241 # use the export parameters but do not override the ones
7242 # specified by the user
7243 for name, value in einfo.items(constants.INISECT_HYP):
7244 if name not in self.op.hvparams:
7245 self.op.hvparams[name] = value
7247 if einfo.has_section(constants.INISECT_BEP):
7248 # use the parameters, without overriding
7249 for name, value in einfo.items(constants.INISECT_BEP):
7250 if name not in self.op.beparams:
7251 self.op.beparams[name] = value
7253 # try to read the parameters old style, from the main section
7254 for name in constants.BES_PARAMETERS:
7255 if (name not in self.op.beparams and
7256 einfo.has_option(constants.INISECT_INS, name)):
7257 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7259 if einfo.has_section(constants.INISECT_OSP):
7260 # use the parameters, without overriding
7261 for name, value in einfo.items(constants.INISECT_OSP):
7262 if name not in self.op.osparams:
7263 self.op.osparams[name] = value
7265 def _RevertToDefaults(self, cluster):
7266 """Revert the instance parameters to the default values.
7270 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7271 for name in self.op.hvparams.keys():
7272 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7273 del self.op.hvparams[name]
7275 be_defs = cluster.SimpleFillBE({})
7276 for name in self.op.beparams.keys():
7277 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7278 del self.op.beparams[name]
7280 nic_defs = cluster.SimpleFillNIC({})
7281 for nic in self.op.nics:
7282 for name in constants.NICS_PARAMETERS:
7283 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7286 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7287 for name in self.op.osparams.keys():
7288 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7289 del self.op.osparams[name]
7291 def CheckPrereq(self):
7292 """Check prerequisites.
7295 if self.op.mode == constants.INSTANCE_IMPORT:
7296 export_info = self._ReadExportInfo()
7297 self._ReadExportParams(export_info)
7299 _CheckDiskTemplate(self.op.disk_template)
7301 if (not self.cfg.GetVGName() and
7302 self.op.disk_template not in constants.DTS_NOT_LVM):
7303 raise errors.OpPrereqError("Cluster does not support lvm-based"
7304 " instances", errors.ECODE_STATE)
7306 if self.op.hypervisor is None:
7307 self.op.hypervisor = self.cfg.GetHypervisorType()
7309 cluster = self.cfg.GetClusterInfo()
7310 enabled_hvs = cluster.enabled_hypervisors
7311 if self.op.hypervisor not in enabled_hvs:
7312 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7313 " cluster (%s)" % (self.op.hypervisor,
7314 ",".join(enabled_hvs)),
7317 # check hypervisor parameter syntax (locally)
7318 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7319 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7321 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7322 hv_type.CheckParameterSyntax(filled_hvp)
7323 self.hv_full = filled_hvp
7324 # check that we don't specify global parameters on an instance
7325 _CheckGlobalHvParams(self.op.hvparams)
7327 # fill and remember the beparams dict
7328 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7329 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7331 # build os parameters
7332 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7334 # now that hvp/bep are in final format, let's reset to defaults,
7336 if self.op.identify_defaults:
7337 self._RevertToDefaults(cluster)
7341 for idx, nic in enumerate(self.op.nics):
7342 nic_mode_req = nic.get("mode", None)
7343 nic_mode = nic_mode_req
7344 if nic_mode is None:
7345 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7347 # in routed mode, for the first nic, the default ip is 'auto'
7348 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7349 default_ip_mode = constants.VALUE_AUTO
7351 default_ip_mode = constants.VALUE_NONE
7353 # ip validity checks
7354 ip = nic.get("ip", default_ip_mode)
7355 if ip is None or ip.lower() == constants.VALUE_NONE:
7357 elif ip.lower() == constants.VALUE_AUTO:
7358 if not self.op.name_check:
7359 raise errors.OpPrereqError("IP address set to auto but name checks"
7360 " have been skipped",
7362 nic_ip = self.hostname1.ip
7364 if not netutils.IPAddress.IsValid(ip):
7365 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7369 # TODO: check the ip address for uniqueness
7370 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7371 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7374 # MAC address verification
7375 mac = nic.get("mac", constants.VALUE_AUTO)
7376 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7377 mac = utils.NormalizeAndValidateMac(mac)
7380 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7381 except errors.ReservationError:
7382 raise errors.OpPrereqError("MAC address %s already in use"
7383 " in cluster" % mac,
7384 errors.ECODE_NOTUNIQUE)
7386 # bridge verification
7387 bridge = nic.get("bridge", None)
7388 link = nic.get("link", None)
7390 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7391 " at the same time", errors.ECODE_INVAL)
7392 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7393 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7400 nicparams[constants.NIC_MODE] = nic_mode_req
7402 nicparams[constants.NIC_LINK] = link
7404 check_params = cluster.SimpleFillNIC(nicparams)
7405 objects.NIC.CheckParameterSyntax(check_params)
7406 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7408 # disk checks/pre-build
7410 for disk in self.op.disks:
7411 mode = disk.get("mode", constants.DISK_RDWR)
7412 if mode not in constants.DISK_ACCESS_SET:
7413 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7414 mode, errors.ECODE_INVAL)
7415 size = disk.get("size", None)
7417 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7420 except (TypeError, ValueError):
7421 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7423 new_disk = {"size": size, "mode": mode}
7425 new_disk["adopt"] = disk["adopt"]
7426 self.disks.append(new_disk)
7428 if self.op.mode == constants.INSTANCE_IMPORT:
7430 # Check that the new instance doesn't have less disks than the export
7431 instance_disks = len(self.disks)
7432 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7433 if instance_disks < export_disks:
7434 raise errors.OpPrereqError("Not enough disks to import."
7435 " (instance: %d, export: %d)" %
7436 (instance_disks, export_disks),
7440 for idx in range(export_disks):
7441 option = 'disk%d_dump' % idx
7442 if export_info.has_option(constants.INISECT_INS, option):
7443 # FIXME: are the old os-es, disk sizes, etc. useful?
7444 export_name = export_info.get(constants.INISECT_INS, option)
7445 image = utils.PathJoin(self.op.src_path, export_name)
7446 disk_images.append(image)
7448 disk_images.append(False)
7450 self.src_images = disk_images
7452 old_name = export_info.get(constants.INISECT_INS, 'name')
7454 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7455 except (TypeError, ValueError), err:
7456 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7457 " an integer: %s" % str(err),
7459 if self.op.instance_name == old_name:
7460 for idx, nic in enumerate(self.nics):
7461 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7462 nic_mac_ini = 'nic%d_mac' % idx
7463 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7465 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7467 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7468 if self.op.ip_check:
7469 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7470 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7471 (self.check_ip, self.op.instance_name),
7472 errors.ECODE_NOTUNIQUE)
7474 #### mac address generation
7475 # By generating here the mac address both the allocator and the hooks get
7476 # the real final mac address rather than the 'auto' or 'generate' value.
7477 # There is a race condition between the generation and the instance object
7478 # creation, which means that we know the mac is valid now, but we're not
7479 # sure it will be when we actually add the instance. If things go bad
7480 # adding the instance will abort because of a duplicate mac, and the
7481 # creation job will fail.
7482 for nic in self.nics:
7483 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7484 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7488 if self.op.iallocator is not None:
7489 self._RunAllocator()
7491 #### node related checks
7493 # check primary node
7494 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7495 assert self.pnode is not None, \
7496 "Cannot retrieve locked node %s" % self.op.pnode
7498 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7499 pnode.name, errors.ECODE_STATE)
7501 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7502 pnode.name, errors.ECODE_STATE)
7503 if not pnode.vm_capable:
7504 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7505 " '%s'" % pnode.name, errors.ECODE_STATE)
7507 self.secondaries = []
7509 # mirror node verification
7510 if self.op.disk_template in constants.DTS_NET_MIRROR:
7511 if self.op.snode == pnode.name:
7512 raise errors.OpPrereqError("The secondary node cannot be the"
7513 " primary node.", errors.ECODE_INVAL)
7514 _CheckNodeOnline(self, self.op.snode)
7515 _CheckNodeNotDrained(self, self.op.snode)
7516 _CheckNodeVmCapable(self, self.op.snode)
7517 self.secondaries.append(self.op.snode)
7519 nodenames = [pnode.name] + self.secondaries
7521 req_size = _ComputeDiskSize(self.op.disk_template,
7524 # Check lv size requirements, if not adopting
7525 if req_size is not None and not self.adopt_disks:
7526 _CheckNodesFreeDisk(self, nodenames, req_size)
7528 if self.adopt_disks: # instead, we must check the adoption data
7529 all_lvs = set([i["adopt"] for i in self.disks])
7530 if len(all_lvs) != len(self.disks):
7531 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7533 for lv_name in all_lvs:
7535 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7536 except errors.ReservationError:
7537 raise errors.OpPrereqError("LV named %s used by another instance" %
7538 lv_name, errors.ECODE_NOTUNIQUE)
7540 node_lvs = self.rpc.call_lv_list([pnode.name],
7541 self.cfg.GetVGName())[pnode.name]
7542 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7543 node_lvs = node_lvs.payload
7544 delta = all_lvs.difference(node_lvs.keys())
7546 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7547 utils.CommaJoin(delta),
7549 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7551 raise errors.OpPrereqError("Online logical volumes found, cannot"
7552 " adopt: %s" % utils.CommaJoin(online_lvs),
7554 # update the size of disk based on what is found
7555 for dsk in self.disks:
7556 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7558 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7560 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7561 # check OS parameters (remotely)
7562 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7564 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7566 # memory check on primary node
7568 _CheckNodeFreeMemory(self, self.pnode.name,
7569 "creating instance %s" % self.op.instance_name,
7570 self.be_full[constants.BE_MEMORY],
7573 self.dry_run_result = list(nodenames)
7575 def Exec(self, feedback_fn):
7576 """Create and add the instance to the cluster.
7579 instance = self.op.instance_name
7580 pnode_name = self.pnode.name
7582 ht_kind = self.op.hypervisor
7583 if ht_kind in constants.HTS_REQ_PORT:
7584 network_port = self.cfg.AllocatePort()
7588 if constants.ENABLE_FILE_STORAGE:
7589 # this is needed because os.path.join does not accept None arguments
7590 if self.op.file_storage_dir is None:
7591 string_file_storage_dir = ""
7593 string_file_storage_dir = self.op.file_storage_dir
7595 # build the full file storage dir path
7596 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7597 string_file_storage_dir, instance)
7599 file_storage_dir = ""
7601 disks = _GenerateDiskTemplate(self,
7602 self.op.disk_template,
7603 instance, pnode_name,
7607 self.op.file_driver,
7610 iobj = objects.Instance(name=instance, os=self.op.os_type,
7611 primary_node=pnode_name,
7612 nics=self.nics, disks=disks,
7613 disk_template=self.op.disk_template,
7615 network_port=network_port,
7616 beparams=self.op.beparams,
7617 hvparams=self.op.hvparams,
7618 hypervisor=self.op.hypervisor,
7619 osparams=self.op.osparams,
7622 if self.adopt_disks:
7623 # rename LVs to the newly-generated names; we need to construct
7624 # 'fake' LV disks with the old data, plus the new unique_id
7625 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7627 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7628 rename_to.append(t_dsk.logical_id)
7629 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7630 self.cfg.SetDiskID(t_dsk, pnode_name)
7631 result = self.rpc.call_blockdev_rename(pnode_name,
7632 zip(tmp_disks, rename_to))
7633 result.Raise("Failed to rename adoped LVs")
7635 feedback_fn("* creating instance disks...")
7637 _CreateDisks(self, iobj)
7638 except errors.OpExecError:
7639 self.LogWarning("Device creation failed, reverting...")
7641 _RemoveDisks(self, iobj)
7643 self.cfg.ReleaseDRBDMinors(instance)
7646 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7647 feedback_fn("* wiping instance disks...")
7649 _WipeDisks(self, iobj)
7650 except errors.OpExecError:
7651 self.LogWarning("Device wiping failed, reverting...")
7653 _RemoveDisks(self, iobj)
7655 self.cfg.ReleaseDRBDMinors(instance)
7658 feedback_fn("adding instance %s to cluster config" % instance)
7660 self.cfg.AddInstance(iobj, self.proc.GetECId())
7662 # Declare that we don't want to remove the instance lock anymore, as we've
7663 # added the instance to the config
7664 del self.remove_locks[locking.LEVEL_INSTANCE]
7665 # Unlock all the nodes
7666 if self.op.mode == constants.INSTANCE_IMPORT:
7667 nodes_keep = [self.op.src_node]
7668 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7669 if node != self.op.src_node]
7670 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7671 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7673 self.context.glm.release(locking.LEVEL_NODE)
7674 del self.acquired_locks[locking.LEVEL_NODE]
7676 if self.op.wait_for_sync:
7677 disk_abort = not _WaitForSync(self, iobj)
7678 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7679 # make sure the disks are not degraded (still sync-ing is ok)
7681 feedback_fn("* checking mirrors status")
7682 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7687 _RemoveDisks(self, iobj)
7688 self.cfg.RemoveInstance(iobj.name)
7689 # Make sure the instance lock gets removed
7690 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7691 raise errors.OpExecError("There are some degraded disks for"
7694 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7695 if self.op.mode == constants.INSTANCE_CREATE:
7696 if not self.op.no_install:
7697 feedback_fn("* running the instance OS create scripts...")
7698 # FIXME: pass debug option from opcode to backend
7699 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7700 self.op.debug_level)
7701 result.Raise("Could not add os for instance %s"
7702 " on node %s" % (instance, pnode_name))
7704 elif self.op.mode == constants.INSTANCE_IMPORT:
7705 feedback_fn("* running the instance OS import scripts...")
7709 for idx, image in enumerate(self.src_images):
7713 # FIXME: pass debug option from opcode to backend
7714 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7715 constants.IEIO_FILE, (image, ),
7716 constants.IEIO_SCRIPT,
7717 (iobj.disks[idx], idx),
7719 transfers.append(dt)
7722 masterd.instance.TransferInstanceData(self, feedback_fn,
7723 self.op.src_node, pnode_name,
7724 self.pnode.secondary_ip,
7726 if not compat.all(import_result):
7727 self.LogWarning("Some disks for instance %s on node %s were not"
7728 " imported successfully" % (instance, pnode_name))
7730 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7731 feedback_fn("* preparing remote import...")
7732 # The source cluster will stop the instance before attempting to make a
7733 # connection. In some cases stopping an instance can take a long time,
7734 # hence the shutdown timeout is added to the connection timeout.
7735 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7736 self.op.source_shutdown_timeout)
7737 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7739 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7740 self.source_x509_ca,
7741 self._cds, timeouts)
7742 if not compat.all(disk_results):
7743 # TODO: Should the instance still be started, even if some disks
7744 # failed to import (valid for local imports, too)?
7745 self.LogWarning("Some disks for instance %s on node %s were not"
7746 " imported successfully" % (instance, pnode_name))
7748 # Run rename script on newly imported instance
7749 assert iobj.name == instance
7750 feedback_fn("Running rename script for %s" % instance)
7751 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7752 self.source_instance_name,
7753 self.op.debug_level)
7755 self.LogWarning("Failed to run rename script for %s on node"
7756 " %s: %s" % (instance, pnode_name, result.fail_msg))
7759 # also checked in the prereq part
7760 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7764 iobj.admin_up = True
7765 self.cfg.Update(iobj, feedback_fn)
7766 logging.info("Starting instance %s on node %s", instance, pnode_name)
7767 feedback_fn("* starting instance...")
7768 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7769 result.Raise("Could not start instance")
7771 return list(iobj.all_nodes)
7774 class LUConnectConsole(NoHooksLU):
7775 """Connect to an instance's console.
7777 This is somewhat special in that it returns the command line that
7778 you need to run on the master node in order to connect to the
7787 def ExpandNames(self):
7788 self._ExpandAndLockInstance()
7790 def CheckPrereq(self):
7791 """Check prerequisites.
7793 This checks that the instance is in the cluster.
7796 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7797 assert self.instance is not None, \
7798 "Cannot retrieve locked instance %s" % self.op.instance_name
7799 _CheckNodeOnline(self, self.instance.primary_node)
7801 def Exec(self, feedback_fn):
7802 """Connect to the console of an instance
7805 instance = self.instance
7806 node = instance.primary_node
7808 node_insts = self.rpc.call_instance_list([node],
7809 [instance.hypervisor])[node]
7810 node_insts.Raise("Can't get node information from %s" % node)
7812 if instance.name not in node_insts.payload:
7813 if instance.admin_up:
7814 state = "ERROR_down"
7816 state = "ADMIN_down"
7817 raise errors.OpExecError("Instance %s is not running (state %s)" %
7818 (instance.name, state))
7820 logging.debug("Connecting to console of %s on %s", instance.name, node)
7822 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7823 cluster = self.cfg.GetClusterInfo()
7824 # beparams and hvparams are passed separately, to avoid editing the
7825 # instance and then saving the defaults in the instance itself.
7826 hvparams = cluster.FillHV(instance)
7827 beparams = cluster.FillBE(instance)
7828 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7831 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7834 class LUReplaceDisks(LogicalUnit):
7835 """Replace the disks of an instance.
7838 HPATH = "mirrors-replace"
7839 HTYPE = constants.HTYPE_INSTANCE
7842 ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7843 ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7844 ("remote_node", None, ht.TMaybeString),
7845 ("iallocator", None, ht.TMaybeString),
7846 ("early_release", False, ht.TBool),
7850 def CheckArguments(self):
7851 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7854 def ExpandNames(self):
7855 self._ExpandAndLockInstance()
7857 if self.op.iallocator is not None:
7858 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7860 elif self.op.remote_node is not None:
7861 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7862 self.op.remote_node = remote_node
7864 # Warning: do not remove the locking of the new secondary here
7865 # unless DRBD8.AddChildren is changed to work in parallel;
7866 # currently it doesn't since parallel invocations of
7867 # FindUnusedMinor will conflict
7868 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7869 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7872 self.needed_locks[locking.LEVEL_NODE] = []
7873 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7875 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7876 self.op.iallocator, self.op.remote_node,
7877 self.op.disks, False, self.op.early_release)
7879 self.tasklets = [self.replacer]
7881 def DeclareLocks(self, level):
7882 # If we're not already locking all nodes in the set we have to declare the
7883 # instance's primary/secondary nodes.
7884 if (level == locking.LEVEL_NODE and
7885 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7886 self._LockInstancesNodes()
7888 def BuildHooksEnv(self):
7891 This runs on the master, the primary and all the secondaries.
7894 instance = self.replacer.instance
7896 "MODE": self.op.mode,
7897 "NEW_SECONDARY": self.op.remote_node,
7898 "OLD_SECONDARY": instance.secondary_nodes[0],
7900 env.update(_BuildInstanceHookEnvByObject(self, instance))
7902 self.cfg.GetMasterNode(),
7903 instance.primary_node,
7905 if self.op.remote_node is not None:
7906 nl.append(self.op.remote_node)
7910 class TLReplaceDisks(Tasklet):
7911 """Replaces disks for an instance.
7913 Note: Locking is not within the scope of this class.
7916 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7917 disks, delay_iallocator, early_release):
7918 """Initializes this class.
7921 Tasklet.__init__(self, lu)
7924 self.instance_name = instance_name
7926 self.iallocator_name = iallocator_name
7927 self.remote_node = remote_node
7929 self.delay_iallocator = delay_iallocator
7930 self.early_release = early_release
7933 self.instance = None
7934 self.new_node = None
7935 self.target_node = None
7936 self.other_node = None
7937 self.remote_node_info = None
7938 self.node_secondary_ip = None
7941 def CheckArguments(mode, remote_node, iallocator):
7942 """Helper function for users of this class.
7945 # check for valid parameter combination
7946 if mode == constants.REPLACE_DISK_CHG:
7947 if remote_node is None and iallocator is None:
7948 raise errors.OpPrereqError("When changing the secondary either an"
7949 " iallocator script must be used or the"
7950 " new node given", errors.ECODE_INVAL)
7952 if remote_node is not None and iallocator is not None:
7953 raise errors.OpPrereqError("Give either the iallocator or the new"
7954 " secondary, not both", errors.ECODE_INVAL)
7956 elif remote_node is not None or iallocator is not None:
7957 # Not replacing the secondary
7958 raise errors.OpPrereqError("The iallocator and new node options can"
7959 " only be used when changing the"
7960 " secondary node", errors.ECODE_INVAL)
7963 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7964 """Compute a new secondary node using an IAllocator.
7967 ial = IAllocator(lu.cfg, lu.rpc,
7968 mode=constants.IALLOCATOR_MODE_RELOC,
7970 relocate_from=relocate_from)
7972 ial.Run(iallocator_name)
7975 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7976 " %s" % (iallocator_name, ial.info),
7979 if len(ial.result) != ial.required_nodes:
7980 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7981 " of nodes (%s), required %s" %
7983 len(ial.result), ial.required_nodes),
7986 remote_node_name = ial.result[0]
7988 lu.LogInfo("Selected new secondary for instance '%s': %s",
7989 instance_name, remote_node_name)
7991 return remote_node_name
7993 def _FindFaultyDisks(self, node_name):
7994 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7997 def CheckPrereq(self):
7998 """Check prerequisites.
8000 This checks that the instance is in the cluster.
8003 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8004 assert instance is not None, \
8005 "Cannot retrieve locked instance %s" % self.instance_name
8007 if instance.disk_template != constants.DT_DRBD8:
8008 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8009 " instances", errors.ECODE_INVAL)
8011 if len(instance.secondary_nodes) != 1:
8012 raise errors.OpPrereqError("The instance has a strange layout,"
8013 " expected one secondary but found %d" %
8014 len(instance.secondary_nodes),
8017 if not self.delay_iallocator:
8018 self._CheckPrereq2()
8020 def _CheckPrereq2(self):
8021 """Check prerequisites, second part.
8023 This function should always be part of CheckPrereq. It was separated and is
8024 now called from Exec because during node evacuation iallocator was only
8025 called with an unmodified cluster model, not taking planned changes into
8029 instance = self.instance
8030 secondary_node = instance.secondary_nodes[0]
8032 if self.iallocator_name is None:
8033 remote_node = self.remote_node
8035 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8036 instance.name, instance.secondary_nodes)
8038 if remote_node is not None:
8039 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8040 assert self.remote_node_info is not None, \
8041 "Cannot retrieve locked node %s" % remote_node
8043 self.remote_node_info = None
8045 if remote_node == self.instance.primary_node:
8046 raise errors.OpPrereqError("The specified node is the primary node of"
8047 " the instance.", errors.ECODE_INVAL)
8049 if remote_node == secondary_node:
8050 raise errors.OpPrereqError("The specified node is already the"
8051 " secondary node of the instance.",
8054 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8055 constants.REPLACE_DISK_CHG):
8056 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8059 if self.mode == constants.REPLACE_DISK_AUTO:
8060 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8061 faulty_secondary = self._FindFaultyDisks(secondary_node)
8063 if faulty_primary and faulty_secondary:
8064 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8065 " one node and can not be repaired"
8066 " automatically" % self.instance_name,
8070 self.disks = faulty_primary
8071 self.target_node = instance.primary_node
8072 self.other_node = secondary_node
8073 check_nodes = [self.target_node, self.other_node]
8074 elif faulty_secondary:
8075 self.disks = faulty_secondary
8076 self.target_node = secondary_node
8077 self.other_node = instance.primary_node
8078 check_nodes = [self.target_node, self.other_node]
8084 # Non-automatic modes
8085 if self.mode == constants.REPLACE_DISK_PRI:
8086 self.target_node = instance.primary_node
8087 self.other_node = secondary_node
8088 check_nodes = [self.target_node, self.other_node]
8090 elif self.mode == constants.REPLACE_DISK_SEC:
8091 self.target_node = secondary_node
8092 self.other_node = instance.primary_node
8093 check_nodes = [self.target_node, self.other_node]
8095 elif self.mode == constants.REPLACE_DISK_CHG:
8096 self.new_node = remote_node
8097 self.other_node = instance.primary_node
8098 self.target_node = secondary_node
8099 check_nodes = [self.new_node, self.other_node]
8101 _CheckNodeNotDrained(self.lu, remote_node)
8102 _CheckNodeVmCapable(self.lu, remote_node)
8104 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8105 assert old_node_info is not None
8106 if old_node_info.offline and not self.early_release:
8107 # doesn't make sense to delay the release
8108 self.early_release = True
8109 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8110 " early-release mode", secondary_node)
8113 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8116 # If not specified all disks should be replaced
8118 self.disks = range(len(self.instance.disks))
8120 for node in check_nodes:
8121 _CheckNodeOnline(self.lu, node)
8123 # Check whether disks are valid
8124 for disk_idx in self.disks:
8125 instance.FindDisk(disk_idx)
8127 # Get secondary node IP addresses
8130 for node_name in [self.target_node, self.other_node, self.new_node]:
8131 if node_name is not None:
8132 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8134 self.node_secondary_ip = node_2nd_ip
8136 def Exec(self, feedback_fn):
8137 """Execute disk replacement.
8139 This dispatches the disk replacement to the appropriate handler.
8142 if self.delay_iallocator:
8143 self._CheckPrereq2()
8146 feedback_fn("No disks need replacement")
8149 feedback_fn("Replacing disk(s) %s for %s" %
8150 (utils.CommaJoin(self.disks), self.instance.name))
8152 activate_disks = (not self.instance.admin_up)
8154 # Activate the instance disks if we're replacing them on a down instance
8156 _StartInstanceDisks(self.lu, self.instance, True)
8159 # Should we replace the secondary node?
8160 if self.new_node is not None:
8161 fn = self._ExecDrbd8Secondary
8163 fn = self._ExecDrbd8DiskOnly
8165 return fn(feedback_fn)
8168 # Deactivate the instance disks if we're replacing them on a
8171 _SafeShutdownInstanceDisks(self.lu, self.instance)
8173 def _CheckVolumeGroup(self, nodes):
8174 self.lu.LogInfo("Checking volume groups")
8176 vgname = self.cfg.GetVGName()
8178 # Make sure volume group exists on all involved nodes
8179 results = self.rpc.call_vg_list(nodes)
8181 raise errors.OpExecError("Can't list volume groups on the nodes")
8185 res.Raise("Error checking node %s" % node)
8186 if vgname not in res.payload:
8187 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8190 def _CheckDisksExistence(self, nodes):
8191 # Check disk existence
8192 for idx, dev in enumerate(self.instance.disks):
8193 if idx not in self.disks:
8197 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8198 self.cfg.SetDiskID(dev, node)
8200 result = self.rpc.call_blockdev_find(node, dev)
8202 msg = result.fail_msg
8203 if msg or not result.payload:
8205 msg = "disk not found"
8206 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8209 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8210 for idx, dev in enumerate(self.instance.disks):
8211 if idx not in self.disks:
8214 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8217 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8219 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8220 " replace disks for instance %s" %
8221 (node_name, self.instance.name))
8223 def _CreateNewStorage(self, node_name):
8224 vgname = self.cfg.GetVGName()
8227 for idx, dev in enumerate(self.instance.disks):
8228 if idx not in self.disks:
8231 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8233 self.cfg.SetDiskID(dev, node_name)
8235 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8236 names = _GenerateUniqueNames(self.lu, lv_names)
8238 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8239 logical_id=(vgname, names[0]))
8240 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8241 logical_id=(vgname, names[1]))
8243 new_lvs = [lv_data, lv_meta]
8244 old_lvs = dev.children
8245 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8247 # we pass force_create=True to force the LVM creation
8248 for new_lv in new_lvs:
8249 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8250 _GetInstanceInfoText(self.instance), False)
8254 def _CheckDevices(self, node_name, iv_names):
8255 for name, (dev, _, _) in iv_names.iteritems():
8256 self.cfg.SetDiskID(dev, node_name)
8258 result = self.rpc.call_blockdev_find(node_name, dev)
8260 msg = result.fail_msg
8261 if msg or not result.payload:
8263 msg = "disk not found"
8264 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8267 if result.payload.is_degraded:
8268 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8270 def _RemoveOldStorage(self, node_name, iv_names):
8271 for name, (_, old_lvs, _) in iv_names.iteritems():
8272 self.lu.LogInfo("Remove logical volumes for %s" % name)
8275 self.cfg.SetDiskID(lv, node_name)
8277 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8279 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8280 hint="remove unused LVs manually")
8282 def _ReleaseNodeLock(self, node_name):
8283 """Releases the lock for a given node."""
8284 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8286 def _ExecDrbd8DiskOnly(self, feedback_fn):
8287 """Replace a disk on the primary or secondary for DRBD 8.
8289 The algorithm for replace is quite complicated:
8291 1. for each disk to be replaced:
8293 1. create new LVs on the target node with unique names
8294 1. detach old LVs from the drbd device
8295 1. rename old LVs to name_replaced.<time_t>
8296 1. rename new LVs to old LVs
8297 1. attach the new LVs (with the old names now) to the drbd device
8299 1. wait for sync across all devices
8301 1. for each modified disk:
8303 1. remove old LVs (which have the name name_replaces.<time_t>)
8305 Failures are not very well handled.
8310 # Step: check device activation
8311 self.lu.LogStep(1, steps_total, "Check device existence")
8312 self._CheckDisksExistence([self.other_node, self.target_node])
8313 self._CheckVolumeGroup([self.target_node, self.other_node])
8315 # Step: check other node consistency
8316 self.lu.LogStep(2, steps_total, "Check peer consistency")
8317 self._CheckDisksConsistency(self.other_node,
8318 self.other_node == self.instance.primary_node,
8321 # Step: create new storage
8322 self.lu.LogStep(3, steps_total, "Allocate new storage")
8323 iv_names = self._CreateNewStorage(self.target_node)
8325 # Step: for each lv, detach+rename*2+attach
8326 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8327 for dev, old_lvs, new_lvs in iv_names.itervalues():
8328 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8330 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8332 result.Raise("Can't detach drbd from local storage on node"
8333 " %s for device %s" % (self.target_node, dev.iv_name))
8335 #cfg.Update(instance)
8337 # ok, we created the new LVs, so now we know we have the needed
8338 # storage; as such, we proceed on the target node to rename
8339 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8340 # using the assumption that logical_id == physical_id (which in
8341 # turn is the unique_id on that node)
8343 # FIXME(iustin): use a better name for the replaced LVs
8344 temp_suffix = int(time.time())
8345 ren_fn = lambda d, suff: (d.physical_id[0],
8346 d.physical_id[1] + "_replaced-%s" % suff)
8348 # Build the rename list based on what LVs exist on the node
8349 rename_old_to_new = []
8350 for to_ren in old_lvs:
8351 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8352 if not result.fail_msg and result.payload:
8354 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8356 self.lu.LogInfo("Renaming the old LVs on the target node")
8357 result = self.rpc.call_blockdev_rename(self.target_node,
8359 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8361 # Now we rename the new LVs to the old LVs
8362 self.lu.LogInfo("Renaming the new LVs on the target node")
8363 rename_new_to_old = [(new, old.physical_id)
8364 for old, new in zip(old_lvs, new_lvs)]
8365 result = self.rpc.call_blockdev_rename(self.target_node,
8367 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8369 for old, new in zip(old_lvs, new_lvs):
8370 new.logical_id = old.logical_id
8371 self.cfg.SetDiskID(new, self.target_node)
8373 for disk in old_lvs:
8374 disk.logical_id = ren_fn(disk, temp_suffix)
8375 self.cfg.SetDiskID(disk, self.target_node)
8377 # Now that the new lvs have the old name, we can add them to the device
8378 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8379 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8381 msg = result.fail_msg
8383 for new_lv in new_lvs:
8384 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8387 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8388 hint=("cleanup manually the unused logical"
8390 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8392 dev.children = new_lvs
8394 self.cfg.Update(self.instance, feedback_fn)
8397 if self.early_release:
8398 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8400 self._RemoveOldStorage(self.target_node, iv_names)
8401 # WARNING: we release both node locks here, do not do other RPCs
8402 # than WaitForSync to the primary node
8403 self._ReleaseNodeLock([self.target_node, self.other_node])
8406 # This can fail as the old devices are degraded and _WaitForSync
8407 # does a combined result over all disks, so we don't check its return value
8408 self.lu.LogStep(cstep, steps_total, "Sync devices")
8410 _WaitForSync(self.lu, self.instance)
8412 # Check all devices manually
8413 self._CheckDevices(self.instance.primary_node, iv_names)
8415 # Step: remove old storage
8416 if not self.early_release:
8417 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8419 self._RemoveOldStorage(self.target_node, iv_names)
8421 def _ExecDrbd8Secondary(self, feedback_fn):
8422 """Replace the secondary node for DRBD 8.
8424 The algorithm for replace is quite complicated:
8425 - for all disks of the instance:
8426 - create new LVs on the new node with same names
8427 - shutdown the drbd device on the old secondary
8428 - disconnect the drbd network on the primary
8429 - create the drbd device on the new secondary
8430 - network attach the drbd on the primary, using an artifice:
8431 the drbd code for Attach() will connect to the network if it
8432 finds a device which is connected to the good local disks but
8434 - wait for sync across all devices
8435 - remove all disks from the old secondary
8437 Failures are not very well handled.
8442 # Step: check device activation
8443 self.lu.LogStep(1, steps_total, "Check device existence")
8444 self._CheckDisksExistence([self.instance.primary_node])
8445 self._CheckVolumeGroup([self.instance.primary_node])
8447 # Step: check other node consistency
8448 self.lu.LogStep(2, steps_total, "Check peer consistency")
8449 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8451 # Step: create new storage
8452 self.lu.LogStep(3, steps_total, "Allocate new storage")
8453 for idx, dev in enumerate(self.instance.disks):
8454 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8455 (self.new_node, idx))
8456 # we pass force_create=True to force LVM creation
8457 for new_lv in dev.children:
8458 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8459 _GetInstanceInfoText(self.instance), False)
8461 # Step 4: dbrd minors and drbd setups changes
8462 # after this, we must manually remove the drbd minors on both the
8463 # error and the success paths
8464 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8465 minors = self.cfg.AllocateDRBDMinor([self.new_node
8466 for dev in self.instance.disks],
8468 logging.debug("Allocated minors %r", minors)
8471 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8472 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8473 (self.new_node, idx))
8474 # create new devices on new_node; note that we create two IDs:
8475 # one without port, so the drbd will be activated without
8476 # networking information on the new node at this stage, and one
8477 # with network, for the latter activation in step 4
8478 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8479 if self.instance.primary_node == o_node1:
8482 assert self.instance.primary_node == o_node2, "Three-node instance?"
8485 new_alone_id = (self.instance.primary_node, self.new_node, None,
8486 p_minor, new_minor, o_secret)
8487 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8488 p_minor, new_minor, o_secret)
8490 iv_names[idx] = (dev, dev.children, new_net_id)
8491 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8493 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8494 logical_id=new_alone_id,
8495 children=dev.children,
8498 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8499 _GetInstanceInfoText(self.instance), False)
8500 except errors.GenericError:
8501 self.cfg.ReleaseDRBDMinors(self.instance.name)
8504 # We have new devices, shutdown the drbd on the old secondary
8505 for idx, dev in enumerate(self.instance.disks):
8506 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8507 self.cfg.SetDiskID(dev, self.target_node)
8508 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8510 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8511 "node: %s" % (idx, msg),
8512 hint=("Please cleanup this device manually as"
8513 " soon as possible"))
8515 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8516 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8517 self.node_secondary_ip,
8518 self.instance.disks)\
8519 [self.instance.primary_node]
8521 msg = result.fail_msg
8523 # detaches didn't succeed (unlikely)
8524 self.cfg.ReleaseDRBDMinors(self.instance.name)
8525 raise errors.OpExecError("Can't detach the disks from the network on"
8526 " old node: %s" % (msg,))
8528 # if we managed to detach at least one, we update all the disks of
8529 # the instance to point to the new secondary
8530 self.lu.LogInfo("Updating instance configuration")
8531 for dev, _, new_logical_id in iv_names.itervalues():
8532 dev.logical_id = new_logical_id
8533 self.cfg.SetDiskID(dev, self.instance.primary_node)
8535 self.cfg.Update(self.instance, feedback_fn)
8537 # and now perform the drbd attach
8538 self.lu.LogInfo("Attaching primary drbds to new secondary"
8539 " (standalone => connected)")
8540 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8542 self.node_secondary_ip,
8543 self.instance.disks,
8546 for to_node, to_result in result.items():
8547 msg = to_result.fail_msg
8549 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8551 hint=("please do a gnt-instance info to see the"
8552 " status of disks"))
8554 if self.early_release:
8555 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8557 self._RemoveOldStorage(self.target_node, iv_names)
8558 # WARNING: we release all node locks here, do not do other RPCs
8559 # than WaitForSync to the primary node
8560 self._ReleaseNodeLock([self.instance.primary_node,
8565 # This can fail as the old devices are degraded and _WaitForSync
8566 # does a combined result over all disks, so we don't check its return value
8567 self.lu.LogStep(cstep, steps_total, "Sync devices")
8569 _WaitForSync(self.lu, self.instance)
8571 # Check all devices manually
8572 self._CheckDevices(self.instance.primary_node, iv_names)
8574 # Step: remove old storage
8575 if not self.early_release:
8576 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8577 self._RemoveOldStorage(self.target_node, iv_names)
8580 class LURepairNodeStorage(NoHooksLU):
8581 """Repairs the volume group on a node.
8586 ("storage_type", ht.NoDefault, _CheckStorageType),
8587 ("name", ht.NoDefault, ht.TNonEmptyString),
8588 ("ignore_consistency", False, ht.TBool),
8592 def CheckArguments(self):
8593 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8595 storage_type = self.op.storage_type
8597 if (constants.SO_FIX_CONSISTENCY not in
8598 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8599 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8600 " repaired" % storage_type,
8603 def ExpandNames(self):
8604 self.needed_locks = {
8605 locking.LEVEL_NODE: [self.op.node_name],
8608 def _CheckFaultyDisks(self, instance, node_name):
8609 """Ensure faulty disks abort the opcode or at least warn."""
8611 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8613 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8614 " node '%s'" % (instance.name, node_name),
8616 except errors.OpPrereqError, err:
8617 if self.op.ignore_consistency:
8618 self.proc.LogWarning(str(err.args[0]))
8622 def CheckPrereq(self):
8623 """Check prerequisites.
8626 # Check whether any instance on this node has faulty disks
8627 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8628 if not inst.admin_up:
8630 check_nodes = set(inst.all_nodes)
8631 check_nodes.discard(self.op.node_name)
8632 for inst_node_name in check_nodes:
8633 self._CheckFaultyDisks(inst, inst_node_name)
8635 def Exec(self, feedback_fn):
8636 feedback_fn("Repairing storage unit '%s' on %s ..." %
8637 (self.op.name, self.op.node_name))
8639 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8640 result = self.rpc.call_storage_execute(self.op.node_name,
8641 self.op.storage_type, st_args,
8643 constants.SO_FIX_CONSISTENCY)
8644 result.Raise("Failed to repair storage unit '%s' on %s" %
8645 (self.op.name, self.op.node_name))
8648 class LUNodeEvacuationStrategy(NoHooksLU):
8649 """Computes the node evacuation strategy.
8653 ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8654 ("remote_node", None, ht.TMaybeString),
8655 ("iallocator", None, ht.TMaybeString),
8659 def CheckArguments(self):
8660 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8662 def ExpandNames(self):
8663 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8664 self.needed_locks = locks = {}
8665 if self.op.remote_node is None:
8666 locks[locking.LEVEL_NODE] = locking.ALL_SET
8668 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8669 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8671 def Exec(self, feedback_fn):
8672 if self.op.remote_node is not None:
8674 for node in self.op.nodes:
8675 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8678 if i.primary_node == self.op.remote_node:
8679 raise errors.OpPrereqError("Node %s is the primary node of"
8680 " instance %s, cannot use it as"
8682 (self.op.remote_node, i.name),
8684 result.append([i.name, self.op.remote_node])
8686 ial = IAllocator(self.cfg, self.rpc,
8687 mode=constants.IALLOCATOR_MODE_MEVAC,
8688 evac_nodes=self.op.nodes)
8689 ial.Run(self.op.iallocator, validate=True)
8691 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8697 class LUGrowDisk(LogicalUnit):
8698 """Grow a disk of an instance.
8702 HTYPE = constants.HTYPE_INSTANCE
8705 ("disk", ht.NoDefault, ht.TInt),
8706 ("amount", ht.NoDefault, ht.TInt),
8707 ("wait_for_sync", True, ht.TBool),
8711 def ExpandNames(self):
8712 self._ExpandAndLockInstance()
8713 self.needed_locks[locking.LEVEL_NODE] = []
8714 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8716 def DeclareLocks(self, level):
8717 if level == locking.LEVEL_NODE:
8718 self._LockInstancesNodes()
8720 def BuildHooksEnv(self):
8723 This runs on the master, the primary and all the secondaries.
8727 "DISK": self.op.disk,
8728 "AMOUNT": self.op.amount,
8730 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8731 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8734 def CheckPrereq(self):
8735 """Check prerequisites.
8737 This checks that the instance is in the cluster.
8740 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8741 assert instance is not None, \
8742 "Cannot retrieve locked instance %s" % self.op.instance_name
8743 nodenames = list(instance.all_nodes)
8744 for node in nodenames:
8745 _CheckNodeOnline(self, node)
8747 self.instance = instance
8749 if instance.disk_template not in constants.DTS_GROWABLE:
8750 raise errors.OpPrereqError("Instance's disk layout does not support"
8751 " growing.", errors.ECODE_INVAL)
8753 self.disk = instance.FindDisk(self.op.disk)
8755 if instance.disk_template != constants.DT_FILE:
8756 # TODO: check the free disk space for file, when that feature will be
8758 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8760 def Exec(self, feedback_fn):
8761 """Execute disk grow.
8764 instance = self.instance
8767 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8769 raise errors.OpExecError("Cannot activate block device to grow")
8771 for node in instance.all_nodes:
8772 self.cfg.SetDiskID(disk, node)
8773 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8774 result.Raise("Grow request failed to node %s" % node)
8776 # TODO: Rewrite code to work properly
8777 # DRBD goes into sync mode for a short amount of time after executing the
8778 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8779 # calling "resize" in sync mode fails. Sleeping for a short amount of
8780 # time is a work-around.
8783 disk.RecordGrow(self.op.amount)
8784 self.cfg.Update(instance, feedback_fn)
8785 if self.op.wait_for_sync:
8786 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8788 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8789 " status.\nPlease check the instance.")
8790 if not instance.admin_up:
8791 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8792 elif not instance.admin_up:
8793 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8794 " not supposed to be running because no wait for"
8795 " sync mode was requested.")
8798 class LUQueryInstanceData(NoHooksLU):
8799 """Query runtime instance data.
8803 ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8804 ("static", False, ht.TBool),
8808 def ExpandNames(self):
8809 self.needed_locks = {}
8810 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8812 if self.op.instances:
8813 self.wanted_names = []
8814 for name in self.op.instances:
8815 full_name = _ExpandInstanceName(self.cfg, name)
8816 self.wanted_names.append(full_name)
8817 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8819 self.wanted_names = None
8820 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8822 self.needed_locks[locking.LEVEL_NODE] = []
8823 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8825 def DeclareLocks(self, level):
8826 if level == locking.LEVEL_NODE:
8827 self._LockInstancesNodes()
8829 def CheckPrereq(self):
8830 """Check prerequisites.
8832 This only checks the optional instance list against the existing names.
8835 if self.wanted_names is None:
8836 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8838 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8839 in self.wanted_names]
8841 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8842 """Returns the status of a block device
8845 if self.op.static or not node:
8848 self.cfg.SetDiskID(dev, node)
8850 result = self.rpc.call_blockdev_find(node, dev)
8854 result.Raise("Can't compute disk status for %s" % instance_name)
8856 status = result.payload
8860 return (status.dev_path, status.major, status.minor,
8861 status.sync_percent, status.estimated_time,
8862 status.is_degraded, status.ldisk_status)
8864 def _ComputeDiskStatus(self, instance, snode, dev):
8865 """Compute block device status.
8868 if dev.dev_type in constants.LDS_DRBD:
8869 # we change the snode then (otherwise we use the one passed in)
8870 if dev.logical_id[0] == instance.primary_node:
8871 snode = dev.logical_id[1]
8873 snode = dev.logical_id[0]
8875 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8877 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8880 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8881 for child in dev.children]
8886 "iv_name": dev.iv_name,
8887 "dev_type": dev.dev_type,
8888 "logical_id": dev.logical_id,
8889 "physical_id": dev.physical_id,
8890 "pstatus": dev_pstatus,
8891 "sstatus": dev_sstatus,
8892 "children": dev_children,
8899 def Exec(self, feedback_fn):
8900 """Gather and return data"""
8903 cluster = self.cfg.GetClusterInfo()
8905 for instance in self.wanted_instances:
8906 if not self.op.static:
8907 remote_info = self.rpc.call_instance_info(instance.primary_node,
8909 instance.hypervisor)
8910 remote_info.Raise("Error checking node %s" % instance.primary_node)
8911 remote_info = remote_info.payload
8912 if remote_info and "state" in remote_info:
8915 remote_state = "down"
8918 if instance.admin_up:
8921 config_state = "down"
8923 disks = [self._ComputeDiskStatus(instance, None, device)
8924 for device in instance.disks]
8927 "name": instance.name,
8928 "config_state": config_state,
8929 "run_state": remote_state,
8930 "pnode": instance.primary_node,
8931 "snodes": instance.secondary_nodes,
8933 # this happens to be the same format used for hooks
8934 "nics": _NICListToTuple(self, instance.nics),
8935 "disk_template": instance.disk_template,
8937 "hypervisor": instance.hypervisor,
8938 "network_port": instance.network_port,
8939 "hv_instance": instance.hvparams,
8940 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8941 "be_instance": instance.beparams,
8942 "be_actual": cluster.FillBE(instance),
8943 "os_instance": instance.osparams,
8944 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8945 "serial_no": instance.serial_no,
8946 "mtime": instance.mtime,
8947 "ctime": instance.ctime,
8948 "uuid": instance.uuid,
8951 result[instance.name] = idict
8956 class LUSetInstanceParams(LogicalUnit):
8957 """Modifies an instances's parameters.
8960 HPATH = "instance-modify"
8961 HTYPE = constants.HTYPE_INSTANCE
8964 ("nics", ht.EmptyList, ht.TList),
8965 ("disks", ht.EmptyList, ht.TList),
8966 ("beparams", ht.EmptyDict, ht.TDict),
8967 ("hvparams", ht.EmptyDict, ht.TDict),
8968 ("disk_template", None, ht.TMaybeString),
8969 ("remote_node", None, ht.TMaybeString),
8970 ("os_name", None, ht.TMaybeString),
8971 ("force_variant", False, ht.TBool),
8972 ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
8977 def CheckArguments(self):
8978 if not (self.op.nics or self.op.disks or self.op.disk_template or
8979 self.op.hvparams or self.op.beparams or self.op.os_name):
8980 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8982 if self.op.hvparams:
8983 _CheckGlobalHvParams(self.op.hvparams)
8987 for disk_op, disk_dict in self.op.disks:
8988 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8989 if disk_op == constants.DDM_REMOVE:
8992 elif disk_op == constants.DDM_ADD:
8995 if not isinstance(disk_op, int):
8996 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8997 if not isinstance(disk_dict, dict):
8998 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8999 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9001 if disk_op == constants.DDM_ADD:
9002 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9003 if mode not in constants.DISK_ACCESS_SET:
9004 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9006 size = disk_dict.get('size', None)
9008 raise errors.OpPrereqError("Required disk parameter size missing",
9012 except (TypeError, ValueError), err:
9013 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9014 str(err), errors.ECODE_INVAL)
9015 disk_dict['size'] = size
9017 # modification of disk
9018 if 'size' in disk_dict:
9019 raise errors.OpPrereqError("Disk size change not possible, use"
9020 " grow-disk", errors.ECODE_INVAL)
9022 if disk_addremove > 1:
9023 raise errors.OpPrereqError("Only one disk add or remove operation"
9024 " supported at a time", errors.ECODE_INVAL)
9026 if self.op.disks and self.op.disk_template is not None:
9027 raise errors.OpPrereqError("Disk template conversion and other disk"
9028 " changes not supported at the same time",
9031 if self.op.disk_template:
9032 _CheckDiskTemplate(self.op.disk_template)
9033 if (self.op.disk_template in constants.DTS_NET_MIRROR and
9034 self.op.remote_node is None):
9035 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9036 " one requires specifying a secondary node",
9041 for nic_op, nic_dict in self.op.nics:
9042 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9043 if nic_op == constants.DDM_REMOVE:
9046 elif nic_op == constants.DDM_ADD:
9049 if not isinstance(nic_op, int):
9050 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9051 if not isinstance(nic_dict, dict):
9052 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9053 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9055 # nic_dict should be a dict
9056 nic_ip = nic_dict.get('ip', None)
9057 if nic_ip is not None:
9058 if nic_ip.lower() == constants.VALUE_NONE:
9059 nic_dict['ip'] = None
9061 if not netutils.IPAddress.IsValid(nic_ip):
9062 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9065 nic_bridge = nic_dict.get('bridge', None)
9066 nic_link = nic_dict.get('link', None)
9067 if nic_bridge and nic_link:
9068 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9069 " at the same time", errors.ECODE_INVAL)
9070 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9071 nic_dict['bridge'] = None
9072 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9073 nic_dict['link'] = None
9075 if nic_op == constants.DDM_ADD:
9076 nic_mac = nic_dict.get('mac', None)
9078 nic_dict['mac'] = constants.VALUE_AUTO
9080 if 'mac' in nic_dict:
9081 nic_mac = nic_dict['mac']
9082 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9083 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9085 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9086 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9087 " modifying an existing nic",
9090 if nic_addremove > 1:
9091 raise errors.OpPrereqError("Only one NIC add or remove operation"
9092 " supported at a time", errors.ECODE_INVAL)
9094 def ExpandNames(self):
9095 self._ExpandAndLockInstance()
9096 self.needed_locks[locking.LEVEL_NODE] = []
9097 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9099 def DeclareLocks(self, level):
9100 if level == locking.LEVEL_NODE:
9101 self._LockInstancesNodes()
9102 if self.op.disk_template and self.op.remote_node:
9103 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9104 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9106 def BuildHooksEnv(self):
9109 This runs on the master, primary and secondaries.
9113 if constants.BE_MEMORY in self.be_new:
9114 args['memory'] = self.be_new[constants.BE_MEMORY]
9115 if constants.BE_VCPUS in self.be_new:
9116 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9117 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9118 # information at all.
9121 nic_override = dict(self.op.nics)
9122 for idx, nic in enumerate(self.instance.nics):
9123 if idx in nic_override:
9124 this_nic_override = nic_override[idx]
9126 this_nic_override = {}
9127 if 'ip' in this_nic_override:
9128 ip = this_nic_override['ip']
9131 if 'mac' in this_nic_override:
9132 mac = this_nic_override['mac']
9135 if idx in self.nic_pnew:
9136 nicparams = self.nic_pnew[idx]
9138 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9139 mode = nicparams[constants.NIC_MODE]
9140 link = nicparams[constants.NIC_LINK]
9141 args['nics'].append((ip, mac, mode, link))
9142 if constants.DDM_ADD in nic_override:
9143 ip = nic_override[constants.DDM_ADD].get('ip', None)
9144 mac = nic_override[constants.DDM_ADD]['mac']
9145 nicparams = self.nic_pnew[constants.DDM_ADD]
9146 mode = nicparams[constants.NIC_MODE]
9147 link = nicparams[constants.NIC_LINK]
9148 args['nics'].append((ip, mac, mode, link))
9149 elif constants.DDM_REMOVE in nic_override:
9150 del args['nics'][-1]
9152 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9153 if self.op.disk_template:
9154 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9155 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9158 def CheckPrereq(self):
9159 """Check prerequisites.
9161 This only checks the instance list against the existing names.
9164 # checking the new params on the primary/secondary nodes
9166 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9167 cluster = self.cluster = self.cfg.GetClusterInfo()
9168 assert self.instance is not None, \
9169 "Cannot retrieve locked instance %s" % self.op.instance_name
9170 pnode = instance.primary_node
9171 nodelist = list(instance.all_nodes)
9174 if self.op.os_name and not self.op.force:
9175 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9176 self.op.force_variant)
9177 instance_os = self.op.os_name
9179 instance_os = instance.os
9181 if self.op.disk_template:
9182 if instance.disk_template == self.op.disk_template:
9183 raise errors.OpPrereqError("Instance already has disk template %s" %
9184 instance.disk_template, errors.ECODE_INVAL)
9186 if (instance.disk_template,
9187 self.op.disk_template) not in self._DISK_CONVERSIONS:
9188 raise errors.OpPrereqError("Unsupported disk template conversion from"
9189 " %s to %s" % (instance.disk_template,
9190 self.op.disk_template),
9192 _CheckInstanceDown(self, instance, "cannot change disk template")
9193 if self.op.disk_template in constants.DTS_NET_MIRROR:
9194 if self.op.remote_node == pnode:
9195 raise errors.OpPrereqError("Given new secondary node %s is the same"
9196 " as the primary node of the instance" %
9197 self.op.remote_node, errors.ECODE_STATE)
9198 _CheckNodeOnline(self, self.op.remote_node)
9199 _CheckNodeNotDrained(self, self.op.remote_node)
9200 disks = [{"size": d.size} for d in instance.disks]
9201 required = _ComputeDiskSize(self.op.disk_template, disks)
9202 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
9204 # hvparams processing
9205 if self.op.hvparams:
9206 hv_type = instance.hypervisor
9207 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9208 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9209 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9212 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9213 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9214 self.hv_new = hv_new # the new actual values
9215 self.hv_inst = i_hvdict # the new dict (without defaults)
9217 self.hv_new = self.hv_inst = {}
9219 # beparams processing
9220 if self.op.beparams:
9221 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9223 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9224 be_new = cluster.SimpleFillBE(i_bedict)
9225 self.be_new = be_new # the new actual values
9226 self.be_inst = i_bedict # the new dict (without defaults)
9228 self.be_new = self.be_inst = {}
9230 # osparams processing
9231 if self.op.osparams:
9232 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9233 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9234 self.os_inst = i_osdict # the new dict (without defaults)
9240 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9241 mem_check_list = [pnode]
9242 if be_new[constants.BE_AUTO_BALANCE]:
9243 # either we changed auto_balance to yes or it was from before
9244 mem_check_list.extend(instance.secondary_nodes)
9245 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9246 instance.hypervisor)
9247 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
9248 instance.hypervisor)
9249 pninfo = nodeinfo[pnode]
9250 msg = pninfo.fail_msg
9252 # Assume the primary node is unreachable and go ahead
9253 self.warn.append("Can't get info from primary node %s: %s" %
9255 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9256 self.warn.append("Node data from primary node %s doesn't contain"
9257 " free memory information" % pnode)
9258 elif instance_info.fail_msg:
9259 self.warn.append("Can't get instance runtime information: %s" %
9260 instance_info.fail_msg)
9262 if instance_info.payload:
9263 current_mem = int(instance_info.payload['memory'])
9265 # Assume instance not running
9266 # (there is a slight race condition here, but it's not very probable,
9267 # and we have no other way to check)
9269 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9270 pninfo.payload['memory_free'])
9272 raise errors.OpPrereqError("This change will prevent the instance"
9273 " from starting, due to %d MB of memory"
9274 " missing on its primary node" % miss_mem,
9277 if be_new[constants.BE_AUTO_BALANCE]:
9278 for node, nres in nodeinfo.items():
9279 if node not in instance.secondary_nodes:
9283 self.warn.append("Can't get info from secondary node %s: %s" %
9285 elif not isinstance(nres.payload.get('memory_free', None), int):
9286 self.warn.append("Secondary node %s didn't return free"
9287 " memory information" % node)
9288 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9289 self.warn.append("Not enough memory to failover instance to"
9290 " secondary node %s" % node)
9295 for nic_op, nic_dict in self.op.nics:
9296 if nic_op == constants.DDM_REMOVE:
9297 if not instance.nics:
9298 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9301 if nic_op != constants.DDM_ADD:
9303 if not instance.nics:
9304 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9305 " no NICs" % nic_op,
9307 if nic_op < 0 or nic_op >= len(instance.nics):
9308 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9310 (nic_op, len(instance.nics) - 1),
9312 old_nic_params = instance.nics[nic_op].nicparams
9313 old_nic_ip = instance.nics[nic_op].ip
9318 update_params_dict = dict([(key, nic_dict[key])
9319 for key in constants.NICS_PARAMETERS
9320 if key in nic_dict])
9322 if 'bridge' in nic_dict:
9323 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9325 new_nic_params = _GetUpdatedParams(old_nic_params,
9327 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9328 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9329 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9330 self.nic_pinst[nic_op] = new_nic_params
9331 self.nic_pnew[nic_op] = new_filled_nic_params
9332 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9334 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9335 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9336 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9338 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9340 self.warn.append(msg)
9342 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9343 if new_nic_mode == constants.NIC_MODE_ROUTED:
9344 if 'ip' in nic_dict:
9345 nic_ip = nic_dict['ip']
9349 raise errors.OpPrereqError('Cannot set the nic ip to None'
9350 ' on a routed nic', errors.ECODE_INVAL)
9351 if 'mac' in nic_dict:
9352 nic_mac = nic_dict['mac']
9354 raise errors.OpPrereqError('Cannot set the nic mac to None',
9356 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9357 # otherwise generate the mac
9358 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9360 # or validate/reserve the current one
9362 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9363 except errors.ReservationError:
9364 raise errors.OpPrereqError("MAC address %s already in use"
9365 " in cluster" % nic_mac,
9366 errors.ECODE_NOTUNIQUE)
9369 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9370 raise errors.OpPrereqError("Disk operations not supported for"
9371 " diskless instances",
9373 for disk_op, _ in self.op.disks:
9374 if disk_op == constants.DDM_REMOVE:
9375 if len(instance.disks) == 1:
9376 raise errors.OpPrereqError("Cannot remove the last disk of"
9377 " an instance", errors.ECODE_INVAL)
9378 _CheckInstanceDown(self, instance, "cannot remove disks")
9380 if (disk_op == constants.DDM_ADD and
9381 len(instance.nics) >= constants.MAX_DISKS):
9382 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9383 " add more" % constants.MAX_DISKS,
9385 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9387 if disk_op < 0 or disk_op >= len(instance.disks):
9388 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9390 (disk_op, len(instance.disks)),
9395 def _ConvertPlainToDrbd(self, feedback_fn):
9396 """Converts an instance from plain to drbd.
9399 feedback_fn("Converting template to drbd")
9400 instance = self.instance
9401 pnode = instance.primary_node
9402 snode = self.op.remote_node
9404 # create a fake disk info for _GenerateDiskTemplate
9405 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9406 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9407 instance.name, pnode, [snode],
9408 disk_info, None, None, 0)
9409 info = _GetInstanceInfoText(instance)
9410 feedback_fn("Creating aditional volumes...")
9411 # first, create the missing data and meta devices
9412 for disk in new_disks:
9413 # unfortunately this is... not too nice
9414 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9416 for child in disk.children:
9417 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9418 # at this stage, all new LVs have been created, we can rename the
9420 feedback_fn("Renaming original volumes...")
9421 rename_list = [(o, n.children[0].logical_id)
9422 for (o, n) in zip(instance.disks, new_disks)]
9423 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9424 result.Raise("Failed to rename original LVs")
9426 feedback_fn("Initializing DRBD devices...")
9427 # all child devices are in place, we can now create the DRBD devices
9428 for disk in new_disks:
9429 for node in [pnode, snode]:
9430 f_create = node == pnode
9431 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9433 # at this point, the instance has been modified
9434 instance.disk_template = constants.DT_DRBD8
9435 instance.disks = new_disks
9436 self.cfg.Update(instance, feedback_fn)
9438 # disks are created, waiting for sync
9439 disk_abort = not _WaitForSync(self, instance)
9441 raise errors.OpExecError("There are some degraded disks for"
9442 " this instance, please cleanup manually")
9444 def _ConvertDrbdToPlain(self, feedback_fn):
9445 """Converts an instance from drbd to plain.
9448 instance = self.instance
9449 assert len(instance.secondary_nodes) == 1
9450 pnode = instance.primary_node
9451 snode = instance.secondary_nodes[0]
9452 feedback_fn("Converting template to plain")
9454 old_disks = instance.disks
9455 new_disks = [d.children[0] for d in old_disks]
9457 # copy over size and mode
9458 for parent, child in zip(old_disks, new_disks):
9459 child.size = parent.size
9460 child.mode = parent.mode
9462 # update instance structure
9463 instance.disks = new_disks
9464 instance.disk_template = constants.DT_PLAIN
9465 self.cfg.Update(instance, feedback_fn)
9467 feedback_fn("Removing volumes on the secondary node...")
9468 for disk in old_disks:
9469 self.cfg.SetDiskID(disk, snode)
9470 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9472 self.LogWarning("Could not remove block device %s on node %s,"
9473 " continuing anyway: %s", disk.iv_name, snode, msg)
9475 feedback_fn("Removing unneeded volumes on the primary node...")
9476 for idx, disk in enumerate(old_disks):
9477 meta = disk.children[1]
9478 self.cfg.SetDiskID(meta, pnode)
9479 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9481 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9482 " continuing anyway: %s", idx, pnode, msg)
9485 def Exec(self, feedback_fn):
9486 """Modifies an instance.
9488 All parameters take effect only at the next restart of the instance.
9491 # Process here the warnings from CheckPrereq, as we don't have a
9492 # feedback_fn there.
9493 for warn in self.warn:
9494 feedback_fn("WARNING: %s" % warn)
9497 instance = self.instance
9499 for disk_op, disk_dict in self.op.disks:
9500 if disk_op == constants.DDM_REMOVE:
9501 # remove the last disk
9502 device = instance.disks.pop()
9503 device_idx = len(instance.disks)
9504 for node, disk in device.ComputeNodeTree(instance.primary_node):
9505 self.cfg.SetDiskID(disk, node)
9506 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9508 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9509 " continuing anyway", device_idx, node, msg)
9510 result.append(("disk/%d" % device_idx, "remove"))
9511 elif disk_op == constants.DDM_ADD:
9513 if instance.disk_template == constants.DT_FILE:
9514 file_driver, file_path = instance.disks[0].logical_id
9515 file_path = os.path.dirname(file_path)
9517 file_driver = file_path = None
9518 disk_idx_base = len(instance.disks)
9519 new_disk = _GenerateDiskTemplate(self,
9520 instance.disk_template,
9521 instance.name, instance.primary_node,
9522 instance.secondary_nodes,
9527 instance.disks.append(new_disk)
9528 info = _GetInstanceInfoText(instance)
9530 logging.info("Creating volume %s for instance %s",
9531 new_disk.iv_name, instance.name)
9532 # Note: this needs to be kept in sync with _CreateDisks
9534 for node in instance.all_nodes:
9535 f_create = node == instance.primary_node
9537 _CreateBlockDev(self, node, instance, new_disk,
9538 f_create, info, f_create)
9539 except errors.OpExecError, err:
9540 self.LogWarning("Failed to create volume %s (%s) on"
9542 new_disk.iv_name, new_disk, node, err)
9543 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9544 (new_disk.size, new_disk.mode)))
9546 # change a given disk
9547 instance.disks[disk_op].mode = disk_dict['mode']
9548 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9550 if self.op.disk_template:
9551 r_shut = _ShutdownInstanceDisks(self, instance)
9553 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9554 " proceed with disk template conversion")
9555 mode = (instance.disk_template, self.op.disk_template)
9557 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9559 self.cfg.ReleaseDRBDMinors(instance.name)
9561 result.append(("disk_template", self.op.disk_template))
9564 for nic_op, nic_dict in self.op.nics:
9565 if nic_op == constants.DDM_REMOVE:
9566 # remove the last nic
9567 del instance.nics[-1]
9568 result.append(("nic.%d" % len(instance.nics), "remove"))
9569 elif nic_op == constants.DDM_ADD:
9570 # mac and bridge should be set, by now
9571 mac = nic_dict['mac']
9572 ip = nic_dict.get('ip', None)
9573 nicparams = self.nic_pinst[constants.DDM_ADD]
9574 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9575 instance.nics.append(new_nic)
9576 result.append(("nic.%d" % (len(instance.nics) - 1),
9577 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9578 (new_nic.mac, new_nic.ip,
9579 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9580 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9583 for key in 'mac', 'ip':
9585 setattr(instance.nics[nic_op], key, nic_dict[key])
9586 if nic_op in self.nic_pinst:
9587 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9588 for key, val in nic_dict.iteritems():
9589 result.append(("nic.%s/%d" % (key, nic_op), val))
9592 if self.op.hvparams:
9593 instance.hvparams = self.hv_inst
9594 for key, val in self.op.hvparams.iteritems():
9595 result.append(("hv/%s" % key, val))
9598 if self.op.beparams:
9599 instance.beparams = self.be_inst
9600 for key, val in self.op.beparams.iteritems():
9601 result.append(("be/%s" % key, val))
9605 instance.os = self.op.os_name
9608 if self.op.osparams:
9609 instance.osparams = self.os_inst
9610 for key, val in self.op.osparams.iteritems():
9611 result.append(("os/%s" % key, val))
9613 self.cfg.Update(instance, feedback_fn)
9617 _DISK_CONVERSIONS = {
9618 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9619 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9623 class LUQueryExports(NoHooksLU):
9624 """Query the exports list
9628 ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9629 ("use_locking", False, ht.TBool),
9633 def ExpandNames(self):
9634 self.needed_locks = {}
9635 self.share_locks[locking.LEVEL_NODE] = 1
9636 if not self.op.nodes:
9637 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9639 self.needed_locks[locking.LEVEL_NODE] = \
9640 _GetWantedNodes(self, self.op.nodes)
9642 def Exec(self, feedback_fn):
9643 """Compute the list of all the exported system images.
9646 @return: a dictionary with the structure node->(export-list)
9647 where export-list is a list of the instances exported on
9651 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9652 rpcresult = self.rpc.call_export_list(self.nodes)
9654 for node in rpcresult:
9655 if rpcresult[node].fail_msg:
9656 result[node] = False
9658 result[node] = rpcresult[node].payload
9663 class LUPrepareExport(NoHooksLU):
9664 """Prepares an instance for an export and returns useful information.
9669 ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9673 def ExpandNames(self):
9674 self._ExpandAndLockInstance()
9676 def CheckPrereq(self):
9677 """Check prerequisites.
9680 instance_name = self.op.instance_name
9682 self.instance = self.cfg.GetInstanceInfo(instance_name)
9683 assert self.instance is not None, \
9684 "Cannot retrieve locked instance %s" % self.op.instance_name
9685 _CheckNodeOnline(self, self.instance.primary_node)
9687 self._cds = _GetClusterDomainSecret()
9689 def Exec(self, feedback_fn):
9690 """Prepares an instance for an export.
9693 instance = self.instance
9695 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9696 salt = utils.GenerateSecret(8)
9698 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9699 result = self.rpc.call_x509_cert_create(instance.primary_node,
9700 constants.RIE_CERT_VALIDITY)
9701 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9703 (name, cert_pem) = result.payload
9705 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9709 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9710 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9712 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9718 class LUExportInstance(LogicalUnit):
9719 """Export an instance to an image in the cluster.
9722 HPATH = "instance-export"
9723 HTYPE = constants.HTYPE_INSTANCE
9726 ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9727 ("shutdown", True, ht.TBool),
9729 ("remove_instance", False, ht.TBool),
9730 ("ignore_remove_failures", False, ht.TBool),
9731 ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9732 ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9733 ("destination_x509_ca", None, ht.TMaybeString),
9737 def CheckArguments(self):
9738 """Check the arguments.
9741 self.x509_key_name = self.op.x509_key_name
9742 self.dest_x509_ca_pem = self.op.destination_x509_ca
9744 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9745 if not self.x509_key_name:
9746 raise errors.OpPrereqError("Missing X509 key name for encryption",
9749 if not self.dest_x509_ca_pem:
9750 raise errors.OpPrereqError("Missing destination X509 CA",
9753 def ExpandNames(self):
9754 self._ExpandAndLockInstance()
9756 # Lock all nodes for local exports
9757 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9758 # FIXME: lock only instance primary and destination node
9760 # Sad but true, for now we have do lock all nodes, as we don't know where
9761 # the previous export might be, and in this LU we search for it and
9762 # remove it from its current node. In the future we could fix this by:
9763 # - making a tasklet to search (share-lock all), then create the
9764 # new one, then one to remove, after
9765 # - removing the removal operation altogether
9766 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9768 def DeclareLocks(self, level):
9769 """Last minute lock declaration."""
9770 # All nodes are locked anyway, so nothing to do here.
9772 def BuildHooksEnv(self):
9775 This will run on the master, primary node and target node.
9779 "EXPORT_MODE": self.op.mode,
9780 "EXPORT_NODE": self.op.target_node,
9781 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9782 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9783 # TODO: Generic function for boolean env variables
9784 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9787 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9789 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9791 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9792 nl.append(self.op.target_node)
9796 def CheckPrereq(self):
9797 """Check prerequisites.
9799 This checks that the instance and node names are valid.
9802 instance_name = self.op.instance_name
9804 self.instance = self.cfg.GetInstanceInfo(instance_name)
9805 assert self.instance is not None, \
9806 "Cannot retrieve locked instance %s" % self.op.instance_name
9807 _CheckNodeOnline(self, self.instance.primary_node)
9809 if (self.op.remove_instance and self.instance.admin_up and
9810 not self.op.shutdown):
9811 raise errors.OpPrereqError("Can not remove instance without shutting it"
9814 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9815 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9816 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9817 assert self.dst_node is not None
9819 _CheckNodeOnline(self, self.dst_node.name)
9820 _CheckNodeNotDrained(self, self.dst_node.name)
9823 self.dest_disk_info = None
9824 self.dest_x509_ca = None
9826 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9827 self.dst_node = None
9829 if len(self.op.target_node) != len(self.instance.disks):
9830 raise errors.OpPrereqError(("Received destination information for %s"
9831 " disks, but instance %s has %s disks") %
9832 (len(self.op.target_node), instance_name,
9833 len(self.instance.disks)),
9836 cds = _GetClusterDomainSecret()
9838 # Check X509 key name
9840 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9841 except (TypeError, ValueError), err:
9842 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9844 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9845 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9848 # Load and verify CA
9850 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9851 except OpenSSL.crypto.Error, err:
9852 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9853 (err, ), errors.ECODE_INVAL)
9855 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9856 if errcode is not None:
9857 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9858 (msg, ), errors.ECODE_INVAL)
9860 self.dest_x509_ca = cert
9862 # Verify target information
9864 for idx, disk_data in enumerate(self.op.target_node):
9866 (host, port, magic) = \
9867 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9868 except errors.GenericError, err:
9869 raise errors.OpPrereqError("Target info for disk %s: %s" %
9870 (idx, err), errors.ECODE_INVAL)
9872 disk_info.append((host, port, magic))
9874 assert len(disk_info) == len(self.op.target_node)
9875 self.dest_disk_info = disk_info
9878 raise errors.ProgrammerError("Unhandled export mode %r" %
9881 # instance disk type verification
9882 # TODO: Implement export support for file-based disks
9883 for disk in self.instance.disks:
9884 if disk.dev_type == constants.LD_FILE:
9885 raise errors.OpPrereqError("Export not supported for instances with"
9886 " file-based disks", errors.ECODE_INVAL)
9888 def _CleanupExports(self, feedback_fn):
9889 """Removes exports of current instance from all other nodes.
9891 If an instance in a cluster with nodes A..D was exported to node C, its
9892 exports will be removed from the nodes A, B and D.
9895 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9897 nodelist = self.cfg.GetNodeList()
9898 nodelist.remove(self.dst_node.name)
9900 # on one-node clusters nodelist will be empty after the removal
9901 # if we proceed the backup would be removed because OpQueryExports
9902 # substitutes an empty list with the full cluster node list.
9903 iname = self.instance.name
9905 feedback_fn("Removing old exports for instance %s" % iname)
9906 exportlist = self.rpc.call_export_list(nodelist)
9907 for node in exportlist:
9908 if exportlist[node].fail_msg:
9910 if iname in exportlist[node].payload:
9911 msg = self.rpc.call_export_remove(node, iname).fail_msg
9913 self.LogWarning("Could not remove older export for instance %s"
9914 " on node %s: %s", iname, node, msg)
9916 def Exec(self, feedback_fn):
9917 """Export an instance to an image in the cluster.
9920 assert self.op.mode in constants.EXPORT_MODES
9922 instance = self.instance
9923 src_node = instance.primary_node
9925 if self.op.shutdown:
9926 # shutdown the instance, but not the disks
9927 feedback_fn("Shutting down instance %s" % instance.name)
9928 result = self.rpc.call_instance_shutdown(src_node, instance,
9929 self.op.shutdown_timeout)
9930 # TODO: Maybe ignore failures if ignore_remove_failures is set
9931 result.Raise("Could not shutdown instance %s on"
9932 " node %s" % (instance.name, src_node))
9934 # set the disks ID correctly since call_instance_start needs the
9935 # correct drbd minor to create the symlinks
9936 for disk in instance.disks:
9937 self.cfg.SetDiskID(disk, src_node)
9939 activate_disks = (not instance.admin_up)
9942 # Activate the instance disks if we'exporting a stopped instance
9943 feedback_fn("Activating disks for %s" % instance.name)
9944 _StartInstanceDisks(self, instance, None)
9947 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9950 helper.CreateSnapshots()
9952 if (self.op.shutdown and instance.admin_up and
9953 not self.op.remove_instance):
9954 assert not activate_disks
9955 feedback_fn("Starting instance %s" % instance.name)
9956 result = self.rpc.call_instance_start(src_node, instance, None, None)
9957 msg = result.fail_msg
9959 feedback_fn("Failed to start instance: %s" % msg)
9960 _ShutdownInstanceDisks(self, instance)
9961 raise errors.OpExecError("Could not start instance: %s" % msg)
9963 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9964 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9965 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9966 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9967 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9969 (key_name, _, _) = self.x509_key_name
9972 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9975 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9976 key_name, dest_ca_pem,
9981 # Check for backwards compatibility
9982 assert len(dresults) == len(instance.disks)
9983 assert compat.all(isinstance(i, bool) for i in dresults), \
9984 "Not all results are boolean: %r" % dresults
9988 feedback_fn("Deactivating disks for %s" % instance.name)
9989 _ShutdownInstanceDisks(self, instance)
9991 if not (compat.all(dresults) and fin_resu):
9994 failures.append("export finalization")
9995 if not compat.all(dresults):
9996 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9998 failures.append("disk export: disk(s) %s" % fdsk)
10000 raise errors.OpExecError("Export failed, errors in %s" %
10001 utils.CommaJoin(failures))
10003 # At this point, the export was successful, we can cleanup/finish
10005 # Remove instance if requested
10006 if self.op.remove_instance:
10007 feedback_fn("Removing instance %s" % instance.name)
10008 _RemoveInstance(self, feedback_fn, instance,
10009 self.op.ignore_remove_failures)
10011 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10012 self._CleanupExports(feedback_fn)
10014 return fin_resu, dresults
10017 class LURemoveExport(NoHooksLU):
10018 """Remove exports related to the named instance.
10026 def ExpandNames(self):
10027 self.needed_locks = {}
10028 # We need all nodes to be locked in order for RemoveExport to work, but we
10029 # don't need to lock the instance itself, as nothing will happen to it (and
10030 # we can remove exports also for a removed instance)
10031 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10033 def Exec(self, feedback_fn):
10034 """Remove any export.
10037 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10038 # If the instance was not found we'll try with the name that was passed in.
10039 # This will only work if it was an FQDN, though.
10041 if not instance_name:
10043 instance_name = self.op.instance_name
10045 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10046 exportlist = self.rpc.call_export_list(locked_nodes)
10048 for node in exportlist:
10049 msg = exportlist[node].fail_msg
10051 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10053 if instance_name in exportlist[node].payload:
10055 result = self.rpc.call_export_remove(node, instance_name)
10056 msg = result.fail_msg
10058 logging.error("Could not remove export for instance %s"
10059 " on node %s: %s", instance_name, node, msg)
10061 if fqdn_warn and not found:
10062 feedback_fn("Export not found. If trying to remove an export belonging"
10063 " to a deleted instance please use its Fully Qualified"
10067 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10068 """Generic tags LU.
10070 This is an abstract class which is the parent of all the other tags LUs.
10074 def ExpandNames(self):
10075 self.needed_locks = {}
10076 if self.op.kind == constants.TAG_NODE:
10077 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10078 self.needed_locks[locking.LEVEL_NODE] = self.op.name
10079 elif self.op.kind == constants.TAG_INSTANCE:
10080 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10081 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10083 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10084 # not possible to acquire the BGL based on opcode parameters)
10086 def CheckPrereq(self):
10087 """Check prerequisites.
10090 if self.op.kind == constants.TAG_CLUSTER:
10091 self.target = self.cfg.GetClusterInfo()
10092 elif self.op.kind == constants.TAG_NODE:
10093 self.target = self.cfg.GetNodeInfo(self.op.name)
10094 elif self.op.kind == constants.TAG_INSTANCE:
10095 self.target = self.cfg.GetInstanceInfo(self.op.name)
10097 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10098 str(self.op.kind), errors.ECODE_INVAL)
10101 class LUGetTags(TagsLU):
10102 """Returns the tags of a given object.
10106 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10107 # Name is only meaningful for nodes and instances
10108 ("name", ht.NoDefault, ht.TMaybeString),
10112 def ExpandNames(self):
10113 TagsLU.ExpandNames(self)
10115 # Share locks as this is only a read operation
10116 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10118 def Exec(self, feedback_fn):
10119 """Returns the tag list.
10122 return list(self.target.GetTags())
10125 class LUSearchTags(NoHooksLU):
10126 """Searches the tags for a given pattern.
10130 ("pattern", ht.NoDefault, ht.TNonEmptyString),
10134 def ExpandNames(self):
10135 self.needed_locks = {}
10137 def CheckPrereq(self):
10138 """Check prerequisites.
10140 This checks the pattern passed for validity by compiling it.
10144 self.re = re.compile(self.op.pattern)
10145 except re.error, err:
10146 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10147 (self.op.pattern, err), errors.ECODE_INVAL)
10149 def Exec(self, feedback_fn):
10150 """Returns the tag list.
10154 tgts = [("/cluster", cfg.GetClusterInfo())]
10155 ilist = cfg.GetAllInstancesInfo().values()
10156 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10157 nlist = cfg.GetAllNodesInfo().values()
10158 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10160 for path, target in tgts:
10161 for tag in target.GetTags():
10162 if self.re.search(tag):
10163 results.append((path, tag))
10167 class LUAddTags(TagsLU):
10168 """Sets a tag on a given object.
10172 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10173 # Name is only meaningful for nodes and instances
10174 ("name", ht.NoDefault, ht.TMaybeString),
10175 ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10179 def CheckPrereq(self):
10180 """Check prerequisites.
10182 This checks the type and length of the tag name and value.
10185 TagsLU.CheckPrereq(self)
10186 for tag in self.op.tags:
10187 objects.TaggableObject.ValidateTag(tag)
10189 def Exec(self, feedback_fn):
10194 for tag in self.op.tags:
10195 self.target.AddTag(tag)
10196 except errors.TagError, err:
10197 raise errors.OpExecError("Error while setting tag: %s" % str(err))
10198 self.cfg.Update(self.target, feedback_fn)
10201 class LUDelTags(TagsLU):
10202 """Delete a list of tags from a given object.
10206 ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10207 # Name is only meaningful for nodes and instances
10208 ("name", ht.NoDefault, ht.TMaybeString),
10209 ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10213 def CheckPrereq(self):
10214 """Check prerequisites.
10216 This checks that we have the given tag.
10219 TagsLU.CheckPrereq(self)
10220 for tag in self.op.tags:
10221 objects.TaggableObject.ValidateTag(tag)
10222 del_tags = frozenset(self.op.tags)
10223 cur_tags = self.target.GetTags()
10225 diff_tags = del_tags - cur_tags
10227 diff_names = ("'%s'" % i for i in sorted(diff_tags))
10228 raise errors.OpPrereqError("Tag(s) %s not found" %
10229 (utils.CommaJoin(diff_names), ),
10230 errors.ECODE_NOENT)
10232 def Exec(self, feedback_fn):
10233 """Remove the tag from the object.
10236 for tag in self.op.tags:
10237 self.target.RemoveTag(tag)
10238 self.cfg.Update(self.target, feedback_fn)
10241 class LUTestDelay(NoHooksLU):
10242 """Sleep for a specified amount of time.
10244 This LU sleeps on the master and/or nodes for a specified amount of
10249 ("duration", ht.NoDefault, ht.TFloat),
10250 ("on_master", True, ht.TBool),
10251 ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10252 ("repeat", 0, ht.TPositiveInt)
10256 def ExpandNames(self):
10257 """Expand names and set required locks.
10259 This expands the node list, if any.
10262 self.needed_locks = {}
10263 if self.op.on_nodes:
10264 # _GetWantedNodes can be used here, but is not always appropriate to use
10265 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10266 # more information.
10267 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10268 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10270 def _TestDelay(self):
10271 """Do the actual sleep.
10274 if self.op.on_master:
10275 if not utils.TestDelay(self.op.duration):
10276 raise errors.OpExecError("Error during master delay test")
10277 if self.op.on_nodes:
10278 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10279 for node, node_result in result.items():
10280 node_result.Raise("Failure during rpc call to node %s" % node)
10282 def Exec(self, feedback_fn):
10283 """Execute the test delay opcode, with the wanted repetitions.
10286 if self.op.repeat == 0:
10289 top_value = self.op.repeat - 1
10290 for i in range(self.op.repeat):
10291 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10295 class LUTestJobqueue(NoHooksLU):
10296 """Utility LU to test some aspects of the job queue.
10300 ("notify_waitlock", False, ht.TBool),
10301 ("notify_exec", False, ht.TBool),
10302 ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10303 ("fail", False, ht.TBool),
10307 # Must be lower than default timeout for WaitForJobChange to see whether it
10308 # notices changed jobs
10309 _CLIENT_CONNECT_TIMEOUT = 20.0
10310 _CLIENT_CONFIRM_TIMEOUT = 60.0
10313 def _NotifyUsingSocket(cls, cb, errcls):
10314 """Opens a Unix socket and waits for another program to connect.
10317 @param cb: Callback to send socket name to client
10318 @type errcls: class
10319 @param errcls: Exception class to use for errors
10322 # Using a temporary directory as there's no easy way to create temporary
10323 # sockets without writing a custom loop around tempfile.mktemp and
10325 tmpdir = tempfile.mkdtemp()
10327 tmpsock = utils.PathJoin(tmpdir, "sock")
10329 logging.debug("Creating temporary socket at %s", tmpsock)
10330 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10335 # Send details to client
10338 # Wait for client to connect before continuing
10339 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10341 (conn, _) = sock.accept()
10342 except socket.error, err:
10343 raise errcls("Client didn't connect in time (%s)" % err)
10347 # Remove as soon as client is connected
10348 shutil.rmtree(tmpdir)
10350 # Wait for client to close
10353 # pylint: disable-msg=E1101
10354 # Instance of '_socketobject' has no ... member
10355 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10357 except socket.error, err:
10358 raise errcls("Client failed to confirm notification (%s)" % err)
10362 def _SendNotification(self, test, arg, sockname):
10363 """Sends a notification to the client.
10366 @param test: Test name
10367 @param arg: Test argument (depends on test)
10368 @type sockname: string
10369 @param sockname: Socket path
10372 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10374 def _Notify(self, prereq, test, arg):
10375 """Notifies the client of a test.
10378 @param prereq: Whether this is a prereq-phase test
10380 @param test: Test name
10381 @param arg: Test argument (depends on test)
10385 errcls = errors.OpPrereqError
10387 errcls = errors.OpExecError
10389 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10393 def CheckArguments(self):
10394 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10395 self.expandnames_calls = 0
10397 def ExpandNames(self):
10398 checkargs_calls = getattr(self, "checkargs_calls", 0)
10399 if checkargs_calls < 1:
10400 raise errors.ProgrammerError("CheckArguments was not called")
10402 self.expandnames_calls += 1
10404 if self.op.notify_waitlock:
10405 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10407 self.LogInfo("Expanding names")
10409 # Get lock on master node (just to get a lock, not for a particular reason)
10410 self.needed_locks = {
10411 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10414 def Exec(self, feedback_fn):
10415 if self.expandnames_calls < 1:
10416 raise errors.ProgrammerError("ExpandNames was not called")
10418 if self.op.notify_exec:
10419 self._Notify(False, constants.JQT_EXEC, None)
10421 self.LogInfo("Executing")
10423 if self.op.log_messages:
10424 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10425 for idx, msg in enumerate(self.op.log_messages):
10426 self.LogInfo("Sending log message %s", idx + 1)
10427 feedback_fn(constants.JQT_MSGPREFIX + msg)
10428 # Report how many test messages have been sent
10429 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10432 raise errors.OpExecError("Opcode failure was requested")
10437 class IAllocator(object):
10438 """IAllocator framework.
10440 An IAllocator instance has three sets of attributes:
10441 - cfg that is needed to query the cluster
10442 - input data (all members of the _KEYS class attribute are required)
10443 - four buffer attributes (in|out_data|text), that represent the
10444 input (to the external script) in text and data structure format,
10445 and the output from it, again in two formats
10446 - the result variables from the script (success, info, nodes) for
10450 # pylint: disable-msg=R0902
10451 # lots of instance attributes
10453 "name", "mem_size", "disks", "disk_template",
10454 "os", "tags", "nics", "vcpus", "hypervisor",
10457 "name", "relocate_from",
10463 def __init__(self, cfg, rpc, mode, **kwargs):
10466 # init buffer variables
10467 self.in_text = self.out_text = self.in_data = self.out_data = None
10468 # init all input fields so that pylint is happy
10470 self.mem_size = self.disks = self.disk_template = None
10471 self.os = self.tags = self.nics = self.vcpus = None
10472 self.hypervisor = None
10473 self.relocate_from = None
10475 self.evac_nodes = None
10477 self.required_nodes = None
10478 # init result fields
10479 self.success = self.info = self.result = None
10480 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10481 keyset = self._ALLO_KEYS
10482 fn = self._AddNewInstance
10483 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10484 keyset = self._RELO_KEYS
10485 fn = self._AddRelocateInstance
10486 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10487 keyset = self._EVAC_KEYS
10488 fn = self._AddEvacuateNodes
10490 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10491 " IAllocator" % self.mode)
10493 if key not in keyset:
10494 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10495 " IAllocator" % key)
10496 setattr(self, key, kwargs[key])
10499 if key not in kwargs:
10500 raise errors.ProgrammerError("Missing input parameter '%s' to"
10501 " IAllocator" % key)
10502 self._BuildInputData(fn)
10504 def _ComputeClusterData(self):
10505 """Compute the generic allocator input data.
10507 This is the data that is independent of the actual operation.
10511 cluster_info = cfg.GetClusterInfo()
10514 "version": constants.IALLOCATOR_VERSION,
10515 "cluster_name": cfg.GetClusterName(),
10516 "cluster_tags": list(cluster_info.GetTags()),
10517 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10518 # we don't have job IDs
10520 iinfo = cfg.GetAllInstancesInfo().values()
10521 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10524 node_list = cfg.GetNodeList()
10526 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10527 hypervisor_name = self.hypervisor
10528 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10529 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10530 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10531 hypervisor_name = cluster_info.enabled_hypervisors[0]
10533 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10536 self.rpc.call_all_instances_info(node_list,
10537 cluster_info.enabled_hypervisors)
10539 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10541 data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10543 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10545 self.in_data = data
10548 def _ComputeNodeGroupData(cfg):
10549 """Compute node groups data.
10553 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10554 ng[guuid] = { "name": gdata.name }
10558 def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10559 """Compute global node data.
10563 for nname, nresult in node_data.items():
10564 # first fill in static (config-based) values
10565 ninfo = cfg.GetNodeInfo(nname)
10567 "tags": list(ninfo.GetTags()),
10568 "primary_ip": ninfo.primary_ip,
10569 "secondary_ip": ninfo.secondary_ip,
10570 "offline": ninfo.offline,
10571 "drained": ninfo.drained,
10572 "master_candidate": ninfo.master_candidate,
10573 "group": ninfo.group,
10574 "master_capable": ninfo.master_capable,
10575 "vm_capable": ninfo.vm_capable,
10578 if not (ninfo.offline or ninfo.drained):
10579 nresult.Raise("Can't get data for node %s" % nname)
10580 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10582 remote_info = nresult.payload
10584 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10585 'vg_size', 'vg_free', 'cpu_total']:
10586 if attr not in remote_info:
10587 raise errors.OpExecError("Node '%s' didn't return attribute"
10588 " '%s'" % (nname, attr))
10589 if not isinstance(remote_info[attr], int):
10590 raise errors.OpExecError("Node '%s' returned invalid value"
10592 (nname, attr, remote_info[attr]))
10593 # compute memory used by primary instances
10594 i_p_mem = i_p_up_mem = 0
10595 for iinfo, beinfo in i_list:
10596 if iinfo.primary_node == nname:
10597 i_p_mem += beinfo[constants.BE_MEMORY]
10598 if iinfo.name not in node_iinfo[nname].payload:
10601 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10602 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10603 remote_info['memory_free'] -= max(0, i_mem_diff)
10606 i_p_up_mem += beinfo[constants.BE_MEMORY]
10608 # compute memory used by instances
10610 "total_memory": remote_info['memory_total'],
10611 "reserved_memory": remote_info['memory_dom0'],
10612 "free_memory": remote_info['memory_free'],
10613 "total_disk": remote_info['vg_size'],
10614 "free_disk": remote_info['vg_free'],
10615 "total_cpus": remote_info['cpu_total'],
10616 "i_pri_memory": i_p_mem,
10617 "i_pri_up_memory": i_p_up_mem,
10619 pnr.update(pnr_dyn)
10621 node_results[nname] = pnr
10623 return node_results
10626 def _ComputeInstanceData(cluster_info, i_list):
10627 """Compute global instance data.
10631 for iinfo, beinfo in i_list:
10633 for nic in iinfo.nics:
10634 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10635 nic_dict = {"mac": nic.mac,
10637 "mode": filled_params[constants.NIC_MODE],
10638 "link": filled_params[constants.NIC_LINK],
10640 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10641 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10642 nic_data.append(nic_dict)
10644 "tags": list(iinfo.GetTags()),
10645 "admin_up": iinfo.admin_up,
10646 "vcpus": beinfo[constants.BE_VCPUS],
10647 "memory": beinfo[constants.BE_MEMORY],
10649 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10651 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10652 "disk_template": iinfo.disk_template,
10653 "hypervisor": iinfo.hypervisor,
10655 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10657 instance_data[iinfo.name] = pir
10659 return instance_data
10661 def _AddNewInstance(self):
10662 """Add new instance data to allocator structure.
10664 This in combination with _AllocatorGetClusterData will create the
10665 correct structure needed as input for the allocator.
10667 The checks for the completeness of the opcode must have already been
10671 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10673 if self.disk_template in constants.DTS_NET_MIRROR:
10674 self.required_nodes = 2
10676 self.required_nodes = 1
10679 "disk_template": self.disk_template,
10682 "vcpus": self.vcpus,
10683 "memory": self.mem_size,
10684 "disks": self.disks,
10685 "disk_space_total": disk_space,
10687 "required_nodes": self.required_nodes,
10691 def _AddRelocateInstance(self):
10692 """Add relocate instance data to allocator structure.
10694 This in combination with _IAllocatorGetClusterData will create the
10695 correct structure needed as input for the allocator.
10697 The checks for the completeness of the opcode must have already been
10701 instance = self.cfg.GetInstanceInfo(self.name)
10702 if instance is None:
10703 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10704 " IAllocator" % self.name)
10706 if instance.disk_template not in constants.DTS_NET_MIRROR:
10707 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10708 errors.ECODE_INVAL)
10710 if len(instance.secondary_nodes) != 1:
10711 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10712 errors.ECODE_STATE)
10714 self.required_nodes = 1
10715 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10716 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10720 "disk_space_total": disk_space,
10721 "required_nodes": self.required_nodes,
10722 "relocate_from": self.relocate_from,
10726 def _AddEvacuateNodes(self):
10727 """Add evacuate nodes data to allocator structure.
10731 "evac_nodes": self.evac_nodes
10735 def _BuildInputData(self, fn):
10736 """Build input data structures.
10739 self._ComputeClusterData()
10742 request["type"] = self.mode
10743 self.in_data["request"] = request
10745 self.in_text = serializer.Dump(self.in_data)
10747 def Run(self, name, validate=True, call_fn=None):
10748 """Run an instance allocator and return the results.
10751 if call_fn is None:
10752 call_fn = self.rpc.call_iallocator_runner
10754 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10755 result.Raise("Failure while running the iallocator script")
10757 self.out_text = result.payload
10759 self._ValidateResult()
10761 def _ValidateResult(self):
10762 """Process the allocator results.
10764 This will process and if successful save the result in
10765 self.out_data and the other parameters.
10769 rdict = serializer.Load(self.out_text)
10770 except Exception, err:
10771 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10773 if not isinstance(rdict, dict):
10774 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10776 # TODO: remove backwards compatiblity in later versions
10777 if "nodes" in rdict and "result" not in rdict:
10778 rdict["result"] = rdict["nodes"]
10781 for key in "success", "info", "result":
10782 if key not in rdict:
10783 raise errors.OpExecError("Can't parse iallocator results:"
10784 " missing key '%s'" % key)
10785 setattr(self, key, rdict[key])
10787 if not isinstance(rdict["result"], list):
10788 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10790 self.out_data = rdict
10793 class LUTestAllocator(NoHooksLU):
10794 """Run allocator tests.
10796 This LU runs the allocator tests
10800 ("direction", ht.NoDefault,
10801 ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10802 ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
10803 ("name", ht.NoDefault, ht.TNonEmptyString),
10804 ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
10805 ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
10806 ht.TOr(ht.TNone, ht.TNonEmptyString))))),
10807 ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
10808 ("hypervisor", None, ht.TMaybeString),
10809 ("allocator", None, ht.TMaybeString),
10810 ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10811 ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10812 ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10813 ("os", None, ht.TMaybeString),
10814 ("disk_template", None, ht.TMaybeString),
10815 ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
10818 def CheckPrereq(self):
10819 """Check prerequisites.
10821 This checks the opcode parameters depending on the director and mode test.
10824 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10825 for attr in ["mem_size", "disks", "disk_template",
10826 "os", "tags", "nics", "vcpus"]:
10827 if not hasattr(self.op, attr):
10828 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10829 attr, errors.ECODE_INVAL)
10830 iname = self.cfg.ExpandInstanceName(self.op.name)
10831 if iname is not None:
10832 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10833 iname, errors.ECODE_EXISTS)
10834 if not isinstance(self.op.nics, list):
10835 raise errors.OpPrereqError("Invalid parameter 'nics'",
10836 errors.ECODE_INVAL)
10837 if not isinstance(self.op.disks, list):
10838 raise errors.OpPrereqError("Invalid parameter 'disks'",
10839 errors.ECODE_INVAL)
10840 for row in self.op.disks:
10841 if (not isinstance(row, dict) or
10842 "size" not in row or
10843 not isinstance(row["size"], int) or
10844 "mode" not in row or
10845 row["mode"] not in ['r', 'w']):
10846 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10847 " parameter", errors.ECODE_INVAL)
10848 if self.op.hypervisor is None:
10849 self.op.hypervisor = self.cfg.GetHypervisorType()
10850 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10851 fname = _ExpandInstanceName(self.cfg, self.op.name)
10852 self.op.name = fname
10853 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10854 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10855 if not hasattr(self.op, "evac_nodes"):
10856 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10857 " opcode input", errors.ECODE_INVAL)
10859 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10860 self.op.mode, errors.ECODE_INVAL)
10862 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10863 if self.op.allocator is None:
10864 raise errors.OpPrereqError("Missing allocator name",
10865 errors.ECODE_INVAL)
10866 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10867 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10868 self.op.direction, errors.ECODE_INVAL)
10870 def Exec(self, feedback_fn):
10871 """Run the allocator test.
10874 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10875 ial = IAllocator(self.cfg, self.rpc,
10878 mem_size=self.op.mem_size,
10879 disks=self.op.disks,
10880 disk_template=self.op.disk_template,
10884 vcpus=self.op.vcpus,
10885 hypervisor=self.op.hypervisor,
10887 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10888 ial = IAllocator(self.cfg, self.rpc,
10891 relocate_from=list(self.relocate_from),
10893 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10894 ial = IAllocator(self.cfg, self.rpc,
10896 evac_nodes=self.op.evac_nodes)
10898 raise errors.ProgrammerError("Uncatched mode %s in"
10899 " LUTestAllocator.Exec", self.op.mode)
10901 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10902 result = ial.in_text
10904 ial.Run(self.op.allocator, validate=False)
10905 result = ial.out_text