4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
57 import ganeti.masterd.instance # pylint: disable-msg=W0611
60 # Modifiable default values; need to define these here before the
64 """Returns an empty list.
71 """Returns an empty dict.
77 #: The without-default default value
81 #: The no-type (value to complex to check it in the type system)
87 """Checks if the given value is not None.
90 return val is not None
94 """Checks if the given value is None.
101 """Checks if the given value is a boolean.
104 return isinstance(val, bool)
108 """Checks if the given value is an integer.
111 return isinstance(val, int)
115 """Checks if the given value is a float.
118 return isinstance(val, float)
122 """Checks if the given value is a string.
125 return isinstance(val, basestring)
129 """Checks if a given value evaluates to a boolean True value.
135 def _TElemOf(target_list):
136 """Builds a function that checks if a given value is a member of a list.
139 return lambda val: val in target_list
144 """Checks if the given value is a list.
147 return isinstance(val, list)
151 """Checks if the given value is a dictionary.
154 return isinstance(val, dict)
159 """Combine multiple functions using an AND operation.
163 return compat.all(t(val) for t in args)
168 """Combine multiple functions using an AND operation.
172 return compat.any(t(val) for t in args)
178 #: a non-empty string
179 _TNonEmptyString = _TAnd(_TString, _TTrue)
182 #: a maybe non-empty string
183 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
186 #: a maybe boolean (bool or none)
187 _TMaybeBool = _TOr(_TBool, _TNone)
190 #: a positive integer
191 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
193 #: a strictly positive integer
194 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
197 def _TListOf(my_type):
198 """Checks if a given value is a list with all elements of the same type.
202 lambda lst: compat.all(my_type(v) for v in lst))
205 def _TDictOf(key_type, val_type):
206 """Checks a dict type for the type of its key/values.
210 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
211 and compat.all(val_type(v)
212 for v in my_dict.values())))
215 # Common opcode attributes
217 #: output fields for a query operation
218 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
221 #: the shutdown timeout
222 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
225 #: the force parameter
226 _PForce = ("force", False, _TBool)
228 #: a required instance name (for single-instance LUs)
229 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
232 #: a required node name (for single-node LUs)
233 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
237 class LogicalUnit(object):
238 """Logical Unit base class.
240 Subclasses must follow these rules:
241 - implement ExpandNames
242 - implement CheckPrereq (except when tasklets are used)
243 - implement Exec (except when tasklets are used)
244 - implement BuildHooksEnv
245 - redefine HPATH and HTYPE
246 - optionally redefine their run requirements:
247 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
249 Note that all commands require root permissions.
251 @ivar dry_run_result: the value (if any) that will be returned to the caller
252 in dry-run mode (signalled by opcode dry_run parameter)
253 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
254 they should get if not already defined, and types they must match
262 def __init__(self, processor, op, context, rpc):
263 """Constructor for LogicalUnit.
265 This needs to be overridden in derived classes in order to check op
269 self.proc = processor
271 self.cfg = context.cfg
272 self.context = context
274 # Dicts used to declare locking needs to mcpu
275 self.needed_locks = None
276 self.acquired_locks = {}
277 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
279 self.remove_locks = {}
280 # Used to force good behavior when calling helper functions
281 self.recalculate_locks = {}
284 self.Log = processor.Log # pylint: disable-msg=C0103
285 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
286 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
287 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
288 # support for dry-run
289 self.dry_run_result = None
290 # support for generic debug attribute
291 if (not hasattr(self.op, "debug_level") or
292 not isinstance(self.op.debug_level, int)):
293 self.op.debug_level = 0
298 # The new kind-of-type-system
299 op_id = self.op.OP_ID
300 for attr_name, aval, test in self._OP_PARAMS:
301 if not hasattr(op, attr_name):
302 if aval == _NoDefault:
303 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
304 (op_id, attr_name), errors.ECODE_INVAL)
310 setattr(self.op, attr_name, dval)
311 attr_val = getattr(op, attr_name)
315 if not callable(test):
316 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
317 " given type is not a proper type (%s)" %
318 (op_id, attr_name, test))
319 if not test(attr_val):
320 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
321 self.op.OP_ID, attr_name, type(attr_val), attr_val)
322 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
323 (op_id, attr_name), errors.ECODE_INVAL)
325 self.CheckArguments()
328 """Returns the SshRunner object
332 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
335 ssh = property(fget=__GetSSH)
337 def CheckArguments(self):
338 """Check syntactic validity for the opcode arguments.
340 This method is for doing a simple syntactic check and ensure
341 validity of opcode parameters, without any cluster-related
342 checks. While the same can be accomplished in ExpandNames and/or
343 CheckPrereq, doing these separate is better because:
345 - ExpandNames is left as as purely a lock-related function
346 - CheckPrereq is run after we have acquired locks (and possible
349 The function is allowed to change the self.op attribute so that
350 later methods can no longer worry about missing parameters.
355 def ExpandNames(self):
356 """Expand names for this LU.
358 This method is called before starting to execute the opcode, and it should
359 update all the parameters of the opcode to their canonical form (e.g. a
360 short node name must be fully expanded after this method has successfully
361 completed). This way locking, hooks, logging, ecc. can work correctly.
363 LUs which implement this method must also populate the self.needed_locks
364 member, as a dict with lock levels as keys, and a list of needed lock names
367 - use an empty dict if you don't need any lock
368 - if you don't need any lock at a particular level omit that level
369 - don't put anything for the BGL level
370 - if you want all locks at a level use locking.ALL_SET as a value
372 If you need to share locks (rather than acquire them exclusively) at one
373 level you can modify self.share_locks, setting a true value (usually 1) for
374 that level. By default locks are not shared.
376 This function can also define a list of tasklets, which then will be
377 executed in order instead of the usual LU-level CheckPrereq and Exec
378 functions, if those are not defined by the LU.
382 # Acquire all nodes and one instance
383 self.needed_locks = {
384 locking.LEVEL_NODE: locking.ALL_SET,
385 locking.LEVEL_INSTANCE: ['instance1.example.com'],
387 # Acquire just two nodes
388 self.needed_locks = {
389 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
392 self.needed_locks = {} # No, you can't leave it to the default value None
395 # The implementation of this method is mandatory only if the new LU is
396 # concurrent, so that old LUs don't need to be changed all at the same
399 self.needed_locks = {} # Exclusive LUs don't need locks.
401 raise NotImplementedError
403 def DeclareLocks(self, level):
404 """Declare LU locking needs for a level
406 While most LUs can just declare their locking needs at ExpandNames time,
407 sometimes there's the need to calculate some locks after having acquired
408 the ones before. This function is called just before acquiring locks at a
409 particular level, but after acquiring the ones at lower levels, and permits
410 such calculations. It can be used to modify self.needed_locks, and by
411 default it does nothing.
413 This function is only called if you have something already set in
414 self.needed_locks for the level.
416 @param level: Locking level which is going to be locked
417 @type level: member of ganeti.locking.LEVELS
421 def CheckPrereq(self):
422 """Check prerequisites for this LU.
424 This method should check that the prerequisites for the execution
425 of this LU are fulfilled. It can do internode communication, but
426 it should be idempotent - no cluster or system changes are
429 The method should raise errors.OpPrereqError in case something is
430 not fulfilled. Its return value is ignored.
432 This method should also update all the parameters of the opcode to
433 their canonical form if it hasn't been done by ExpandNames before.
436 if self.tasklets is not None:
437 for (idx, tl) in enumerate(self.tasklets):
438 logging.debug("Checking prerequisites for tasklet %s/%s",
439 idx + 1, len(self.tasklets))
444 def Exec(self, feedback_fn):
447 This method should implement the actual work. It should raise
448 errors.OpExecError for failures that are somewhat dealt with in
452 if self.tasklets is not None:
453 for (idx, tl) in enumerate(self.tasklets):
454 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
457 raise NotImplementedError
459 def BuildHooksEnv(self):
460 """Build hooks environment for this LU.
462 This method should return a three-node tuple consisting of: a dict
463 containing the environment that will be used for running the
464 specific hook for this LU, a list of node names on which the hook
465 should run before the execution, and a list of node names on which
466 the hook should run after the execution.
468 The keys of the dict must not have 'GANETI_' prefixed as this will
469 be handled in the hooks runner. Also note additional keys will be
470 added by the hooks runner. If the LU doesn't define any
471 environment, an empty dict (and not None) should be returned.
473 No nodes should be returned as an empty list (and not None).
475 Note that if the HPATH for a LU class is None, this function will
479 raise NotImplementedError
481 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
482 """Notify the LU about the results of its hooks.
484 This method is called every time a hooks phase is executed, and notifies
485 the Logical Unit about the hooks' result. The LU can then use it to alter
486 its result based on the hooks. By default the method does nothing and the
487 previous result is passed back unchanged but any LU can define it if it
488 wants to use the local cluster hook-scripts somehow.
490 @param phase: one of L{constants.HOOKS_PHASE_POST} or
491 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
492 @param hook_results: the results of the multi-node hooks rpc call
493 @param feedback_fn: function used send feedback back to the caller
494 @param lu_result: the previous Exec result this LU had, or None
496 @return: the new Exec result, based on the previous result
500 # API must be kept, thus we ignore the unused argument and could
501 # be a function warnings
502 # pylint: disable-msg=W0613,R0201
505 def _ExpandAndLockInstance(self):
506 """Helper function to expand and lock an instance.
508 Many LUs that work on an instance take its name in self.op.instance_name
509 and need to expand it and then declare the expanded name for locking. This
510 function does it, and then updates self.op.instance_name to the expanded
511 name. It also initializes needed_locks as a dict, if this hasn't been done
515 if self.needed_locks is None:
516 self.needed_locks = {}
518 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
519 "_ExpandAndLockInstance called with instance-level locks set"
520 self.op.instance_name = _ExpandInstanceName(self.cfg,
521 self.op.instance_name)
522 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
524 def _LockInstancesNodes(self, primary_only=False):
525 """Helper function to declare instances' nodes for locking.
527 This function should be called after locking one or more instances to lock
528 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
529 with all primary or secondary nodes for instances already locked and
530 present in self.needed_locks[locking.LEVEL_INSTANCE].
532 It should be called from DeclareLocks, and for safety only works if
533 self.recalculate_locks[locking.LEVEL_NODE] is set.
535 In the future it may grow parameters to just lock some instance's nodes, or
536 to just lock primaries or secondary nodes, if needed.
538 If should be called in DeclareLocks in a way similar to::
540 if level == locking.LEVEL_NODE:
541 self._LockInstancesNodes()
543 @type primary_only: boolean
544 @param primary_only: only lock primary nodes of locked instances
547 assert locking.LEVEL_NODE in self.recalculate_locks, \
548 "_LockInstancesNodes helper function called with no nodes to recalculate"
550 # TODO: check if we're really been called with the instance locks held
552 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
553 # future we might want to have different behaviors depending on the value
554 # of self.recalculate_locks[locking.LEVEL_NODE]
556 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
557 instance = self.context.cfg.GetInstanceInfo(instance_name)
558 wanted_nodes.append(instance.primary_node)
560 wanted_nodes.extend(instance.secondary_nodes)
562 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
563 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
564 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
565 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
567 del self.recalculate_locks[locking.LEVEL_NODE]
570 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
571 """Simple LU which runs no hooks.
573 This LU is intended as a parent for other LogicalUnits which will
574 run no hooks, in order to reduce duplicate code.
580 def BuildHooksEnv(self):
581 """Empty BuildHooksEnv for NoHooksLu.
583 This just raises an error.
586 assert False, "BuildHooksEnv called for NoHooksLUs"
590 """Tasklet base class.
592 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
593 they can mix legacy code with tasklets. Locking needs to be done in the LU,
594 tasklets know nothing about locks.
596 Subclasses must follow these rules:
597 - Implement CheckPrereq
601 def __init__(self, lu):
608 def CheckPrereq(self):
609 """Check prerequisites for this tasklets.
611 This method should check whether the prerequisites for the execution of
612 this tasklet are fulfilled. It can do internode communication, but it
613 should be idempotent - no cluster or system changes are allowed.
615 The method should raise errors.OpPrereqError in case something is not
616 fulfilled. Its return value is ignored.
618 This method should also update all parameters to their canonical form if it
619 hasn't been done before.
624 def Exec(self, feedback_fn):
625 """Execute the tasklet.
627 This method should implement the actual work. It should raise
628 errors.OpExecError for failures that are somewhat dealt with in code, or
632 raise NotImplementedError
635 def _GetWantedNodes(lu, nodes):
636 """Returns list of checked and expanded node names.
638 @type lu: L{LogicalUnit}
639 @param lu: the logical unit on whose behalf we execute
641 @param nodes: list of node names or None for all nodes
643 @return: the list of nodes, sorted
644 @raise errors.ProgrammerError: if the nodes parameter is wrong type
648 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
649 " non-empty list of nodes whose name is to be expanded.")
651 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
652 return utils.NiceSort(wanted)
655 def _GetWantedInstances(lu, instances):
656 """Returns list of checked and expanded instance names.
658 @type lu: L{LogicalUnit}
659 @param lu: the logical unit on whose behalf we execute
660 @type instances: list
661 @param instances: list of instance names or None for all instances
663 @return: the list of instances, sorted
664 @raise errors.OpPrereqError: if the instances parameter is wrong type
665 @raise errors.OpPrereqError: if any of the passed instances is not found
669 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
671 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
675 def _GetUpdatedParams(old_params, update_dict,
676 use_default=True, use_none=False):
677 """Return the new version of a parameter dictionary.
679 @type old_params: dict
680 @param old_params: old parameters
681 @type update_dict: dict
682 @param update_dict: dict containing new parameter values, or
683 constants.VALUE_DEFAULT to reset the parameter to its default
685 @param use_default: boolean
686 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
687 values as 'to be deleted' values
688 @param use_none: boolean
689 @type use_none: whether to recognise C{None} values as 'to be
692 @return: the new parameter dictionary
695 params_copy = copy.deepcopy(old_params)
696 for key, val in update_dict.iteritems():
697 if ((use_default and val == constants.VALUE_DEFAULT) or
698 (use_none and val is None)):
704 params_copy[key] = val
708 def _CheckOutputFields(static, dynamic, selected):
709 """Checks whether all selected fields are valid.
711 @type static: L{utils.FieldSet}
712 @param static: static fields set
713 @type dynamic: L{utils.FieldSet}
714 @param dynamic: dynamic fields set
721 delta = f.NonMatching(selected)
723 raise errors.OpPrereqError("Unknown output fields selected: %s"
724 % ",".join(delta), errors.ECODE_INVAL)
727 def _CheckGlobalHvParams(params):
728 """Validates that given hypervisor params are not global ones.
730 This will ensure that instances don't get customised versions of
734 used_globals = constants.HVC_GLOBALS.intersection(params)
736 msg = ("The following hypervisor parameters are global and cannot"
737 " be customized at instance level, please modify them at"
738 " cluster level: %s" % utils.CommaJoin(used_globals))
739 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
742 def _CheckNodeOnline(lu, node):
743 """Ensure that a given node is online.
745 @param lu: the LU on behalf of which we make the check
746 @param node: the node to check
747 @raise errors.OpPrereqError: if the node is offline
750 if lu.cfg.GetNodeInfo(node).offline:
751 raise errors.OpPrereqError("Can't use offline node %s" % node,
755 def _CheckNodeNotDrained(lu, node):
756 """Ensure that a given node is not drained.
758 @param lu: the LU on behalf of which we make the check
759 @param node: the node to check
760 @raise errors.OpPrereqError: if the node is drained
763 if lu.cfg.GetNodeInfo(node).drained:
764 raise errors.OpPrereqError("Can't use drained node %s" % node,
768 def _CheckNodeHasOS(lu, node, os_name, force_variant):
769 """Ensure that a node supports a given OS.
771 @param lu: the LU on behalf of which we make the check
772 @param node: the node to check
773 @param os_name: the OS to query about
774 @param force_variant: whether to ignore variant errors
775 @raise errors.OpPrereqError: if the node is not supporting the OS
778 result = lu.rpc.call_os_get(node, os_name)
779 result.Raise("OS '%s' not in supported OS list for node %s" %
781 prereq=True, ecode=errors.ECODE_INVAL)
782 if not force_variant:
783 _CheckOSVariant(result.payload, os_name)
786 def _RequireFileStorage():
787 """Checks that file storage is enabled.
789 @raise errors.OpPrereqError: when file storage is disabled
792 if not constants.ENABLE_FILE_STORAGE:
793 raise errors.OpPrereqError("File storage disabled at configure time",
797 def _CheckDiskTemplate(template):
798 """Ensure a given disk template is valid.
801 if template not in constants.DISK_TEMPLATES:
802 msg = ("Invalid disk template name '%s', valid templates are: %s" %
803 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
804 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
805 if template == constants.DT_FILE:
806 _RequireFileStorage()
810 def _CheckStorageType(storage_type):
811 """Ensure a given storage type is valid.
814 if storage_type not in constants.VALID_STORAGE_TYPES:
815 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
817 if storage_type == constants.ST_FILE:
818 _RequireFileStorage()
822 def _GetClusterDomainSecret():
823 """Reads the cluster domain secret.
826 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
830 def _CheckInstanceDown(lu, instance, reason):
831 """Ensure that an instance is not running."""
832 if instance.admin_up:
833 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
834 (instance.name, reason), errors.ECODE_STATE)
836 pnode = instance.primary_node
837 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
838 ins_l.Raise("Can't contact node %s for instance information" % pnode,
839 prereq=True, ecode=errors.ECODE_ENVIRON)
841 if instance.name in ins_l.payload:
842 raise errors.OpPrereqError("Instance %s is running, %s" %
843 (instance.name, reason), errors.ECODE_STATE)
846 def _ExpandItemName(fn, name, kind):
847 """Expand an item name.
849 @param fn: the function to use for expansion
850 @param name: requested item name
851 @param kind: text description ('Node' or 'Instance')
852 @return: the resolved (full) name
853 @raise errors.OpPrereqError: if the item is not found
857 if full_name is None:
858 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
863 def _ExpandNodeName(cfg, name):
864 """Wrapper over L{_ExpandItemName} for nodes."""
865 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
868 def _ExpandInstanceName(cfg, name):
869 """Wrapper over L{_ExpandItemName} for instance."""
870 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
873 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
874 memory, vcpus, nics, disk_template, disks,
875 bep, hvp, hypervisor_name):
876 """Builds instance related env variables for hooks
878 This builds the hook environment from individual variables.
881 @param name: the name of the instance
882 @type primary_node: string
883 @param primary_node: the name of the instance's primary node
884 @type secondary_nodes: list
885 @param secondary_nodes: list of secondary nodes as strings
886 @type os_type: string
887 @param os_type: the name of the instance's OS
888 @type status: boolean
889 @param status: the should_run status of the instance
891 @param memory: the memory size of the instance
893 @param vcpus: the count of VCPUs the instance has
895 @param nics: list of tuples (ip, mac, mode, link) representing
896 the NICs the instance has
897 @type disk_template: string
898 @param disk_template: the disk template of the instance
900 @param disks: the list of (size, mode) pairs
902 @param bep: the backend parameters for the instance
904 @param hvp: the hypervisor parameters for the instance
905 @type hypervisor_name: string
906 @param hypervisor_name: the hypervisor for the instance
908 @return: the hook environment for this instance
917 "INSTANCE_NAME": name,
918 "INSTANCE_PRIMARY": primary_node,
919 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
920 "INSTANCE_OS_TYPE": os_type,
921 "INSTANCE_STATUS": str_status,
922 "INSTANCE_MEMORY": memory,
923 "INSTANCE_VCPUS": vcpus,
924 "INSTANCE_DISK_TEMPLATE": disk_template,
925 "INSTANCE_HYPERVISOR": hypervisor_name,
929 nic_count = len(nics)
930 for idx, (ip, mac, mode, link) in enumerate(nics):
933 env["INSTANCE_NIC%d_IP" % idx] = ip
934 env["INSTANCE_NIC%d_MAC" % idx] = mac
935 env["INSTANCE_NIC%d_MODE" % idx] = mode
936 env["INSTANCE_NIC%d_LINK" % idx] = link
937 if mode == constants.NIC_MODE_BRIDGED:
938 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
942 env["INSTANCE_NIC_COUNT"] = nic_count
945 disk_count = len(disks)
946 for idx, (size, mode) in enumerate(disks):
947 env["INSTANCE_DISK%d_SIZE" % idx] = size
948 env["INSTANCE_DISK%d_MODE" % idx] = mode
952 env["INSTANCE_DISK_COUNT"] = disk_count
954 for source, kind in [(bep, "BE"), (hvp, "HV")]:
955 for key, value in source.items():
956 env["INSTANCE_%s_%s" % (kind, key)] = value
961 def _NICListToTuple(lu, nics):
962 """Build a list of nic information tuples.
964 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
965 value in LUQueryInstanceData.
967 @type lu: L{LogicalUnit}
968 @param lu: the logical unit on whose behalf we execute
969 @type nics: list of L{objects.NIC}
970 @param nics: list of nics to convert to hooks tuples
974 cluster = lu.cfg.GetClusterInfo()
978 filled_params = cluster.SimpleFillNIC(nic.nicparams)
979 mode = filled_params[constants.NIC_MODE]
980 link = filled_params[constants.NIC_LINK]
981 hooks_nics.append((ip, mac, mode, link))
985 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
986 """Builds instance related env variables for hooks from an object.
988 @type lu: L{LogicalUnit}
989 @param lu: the logical unit on whose behalf we execute
990 @type instance: L{objects.Instance}
991 @param instance: the instance for which we should build the
994 @param override: dictionary with key/values that will override
997 @return: the hook environment dictionary
1000 cluster = lu.cfg.GetClusterInfo()
1001 bep = cluster.FillBE(instance)
1002 hvp = cluster.FillHV(instance)
1004 'name': instance.name,
1005 'primary_node': instance.primary_node,
1006 'secondary_nodes': instance.secondary_nodes,
1007 'os_type': instance.os,
1008 'status': instance.admin_up,
1009 'memory': bep[constants.BE_MEMORY],
1010 'vcpus': bep[constants.BE_VCPUS],
1011 'nics': _NICListToTuple(lu, instance.nics),
1012 'disk_template': instance.disk_template,
1013 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1016 'hypervisor_name': instance.hypervisor,
1019 args.update(override)
1020 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1023 def _AdjustCandidatePool(lu, exceptions):
1024 """Adjust the candidate pool after node operations.
1027 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1029 lu.LogInfo("Promoted nodes to master candidate role: %s",
1030 utils.CommaJoin(node.name for node in mod_list))
1031 for name in mod_list:
1032 lu.context.ReaddNode(name)
1033 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1035 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1039 def _DecideSelfPromotion(lu, exceptions=None):
1040 """Decide whether I should promote myself as a master candidate.
1043 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1044 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1045 # the new node will increase mc_max with one, so:
1046 mc_should = min(mc_should + 1, cp_size)
1047 return mc_now < mc_should
1050 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1051 """Check that the brigdes needed by a list of nics exist.
1054 cluster = lu.cfg.GetClusterInfo()
1055 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1056 brlist = [params[constants.NIC_LINK] for params in paramslist
1057 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1059 result = lu.rpc.call_bridges_exist(target_node, brlist)
1060 result.Raise("Error checking bridges on destination node '%s'" %
1061 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1064 def _CheckInstanceBridgesExist(lu, instance, node=None):
1065 """Check that the brigdes needed by an instance exist.
1069 node = instance.primary_node
1070 _CheckNicsBridgesExist(lu, instance.nics, node)
1073 def _CheckOSVariant(os_obj, name):
1074 """Check whether an OS name conforms to the os variants specification.
1076 @type os_obj: L{objects.OS}
1077 @param os_obj: OS object to check
1079 @param name: OS name passed by the user, to check for validity
1082 if not os_obj.supported_variants:
1085 variant = name.split("+", 1)[1]
1087 raise errors.OpPrereqError("OS name must include a variant",
1090 if variant not in os_obj.supported_variants:
1091 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1094 def _GetNodeInstancesInner(cfg, fn):
1095 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1098 def _GetNodeInstances(cfg, node_name):
1099 """Returns a list of all primary and secondary instances on a node.
1103 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1106 def _GetNodePrimaryInstances(cfg, node_name):
1107 """Returns primary instances on a node.
1110 return _GetNodeInstancesInner(cfg,
1111 lambda inst: node_name == inst.primary_node)
1114 def _GetNodeSecondaryInstances(cfg, node_name):
1115 """Returns secondary instances on a node.
1118 return _GetNodeInstancesInner(cfg,
1119 lambda inst: node_name in inst.secondary_nodes)
1122 def _GetStorageTypeArgs(cfg, storage_type):
1123 """Returns the arguments for a storage type.
1126 # Special case for file storage
1127 if storage_type == constants.ST_FILE:
1128 # storage.FileStorage wants a list of storage directories
1129 return [[cfg.GetFileStorageDir()]]
1134 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1137 for dev in instance.disks:
1138 cfg.SetDiskID(dev, node_name)
1140 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1141 result.Raise("Failed to get disk status from node %s" % node_name,
1142 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1144 for idx, bdev_status in enumerate(result.payload):
1145 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1151 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1152 """Check the sanity of iallocator and node arguments and use the
1153 cluster-wide iallocator if appropriate.
1155 Check that at most one of (iallocator, node) is specified. If none is
1156 specified, then the LU's opcode's iallocator slot is filled with the
1157 cluster-wide default iallocator.
1159 @type iallocator_slot: string
1160 @param iallocator_slot: the name of the opcode iallocator slot
1161 @type node_slot: string
1162 @param node_slot: the name of the opcode target node slot
1165 node = getattr(lu.op, node_slot, None)
1166 iallocator = getattr(lu.op, iallocator_slot, None)
1168 if node is not None and iallocator is not None:
1169 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1171 elif node is None and iallocator is None:
1172 default_iallocator = lu.cfg.GetDefaultIAllocator()
1173 if default_iallocator:
1174 setattr(lu.op, iallocator_slot, default_iallocator)
1176 raise errors.OpPrereqError("No iallocator or node given and no"
1177 " cluster-wide default iallocator found."
1178 " Please specify either an iallocator or a"
1179 " node, or set a cluster-wide default"
1183 class LUPostInitCluster(LogicalUnit):
1184 """Logical unit for running hooks after cluster initialization.
1187 HPATH = "cluster-init"
1188 HTYPE = constants.HTYPE_CLUSTER
1190 def BuildHooksEnv(self):
1194 env = {"OP_TARGET": self.cfg.GetClusterName()}
1195 mn = self.cfg.GetMasterNode()
1196 return env, [], [mn]
1198 def Exec(self, feedback_fn):
1205 class LUDestroyCluster(LogicalUnit):
1206 """Logical unit for destroying the cluster.
1209 HPATH = "cluster-destroy"
1210 HTYPE = constants.HTYPE_CLUSTER
1212 def BuildHooksEnv(self):
1216 env = {"OP_TARGET": self.cfg.GetClusterName()}
1219 def CheckPrereq(self):
1220 """Check prerequisites.
1222 This checks whether the cluster is empty.
1224 Any errors are signaled by raising errors.OpPrereqError.
1227 master = self.cfg.GetMasterNode()
1229 nodelist = self.cfg.GetNodeList()
1230 if len(nodelist) != 1 or nodelist[0] != master:
1231 raise errors.OpPrereqError("There are still %d node(s) in"
1232 " this cluster." % (len(nodelist) - 1),
1234 instancelist = self.cfg.GetInstanceList()
1236 raise errors.OpPrereqError("There are still %d instance(s) in"
1237 " this cluster." % len(instancelist),
1240 def Exec(self, feedback_fn):
1241 """Destroys the cluster.
1244 master = self.cfg.GetMasterNode()
1245 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1247 # Run post hooks on master node before it's removed
1248 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1250 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1252 # pylint: disable-msg=W0702
1253 self.LogWarning("Errors occurred running hooks on %s" % master)
1255 result = self.rpc.call_node_stop_master(master, False)
1256 result.Raise("Could not disable the master role")
1258 if modify_ssh_setup:
1259 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1260 utils.CreateBackup(priv_key)
1261 utils.CreateBackup(pub_key)
1266 def _VerifyCertificate(filename):
1267 """Verifies a certificate for LUVerifyCluster.
1269 @type filename: string
1270 @param filename: Path to PEM file
1274 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1275 utils.ReadFile(filename))
1276 except Exception, err: # pylint: disable-msg=W0703
1277 return (LUVerifyCluster.ETYPE_ERROR,
1278 "Failed to load X509 certificate %s: %s" % (filename, err))
1281 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1282 constants.SSL_CERT_EXPIRATION_ERROR)
1285 fnamemsg = "While verifying %s: %s" % (filename, msg)
1290 return (None, fnamemsg)
1291 elif errcode == utils.CERT_WARNING:
1292 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1293 elif errcode == utils.CERT_ERROR:
1294 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1296 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1299 class LUVerifyCluster(LogicalUnit):
1300 """Verifies the cluster status.
1303 HPATH = "cluster-verify"
1304 HTYPE = constants.HTYPE_CLUSTER
1306 ("skip_checks", _EmptyList,
1307 _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1308 ("verbose", False, _TBool),
1309 ("error_codes", False, _TBool),
1310 ("debug_simulate_errors", False, _TBool),
1314 TCLUSTER = "cluster"
1316 TINSTANCE = "instance"
1318 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1319 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1320 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1321 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1322 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1323 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1324 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1325 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1326 ENODEDRBD = (TNODE, "ENODEDRBD")
1327 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1328 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1329 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1330 ENODEHV = (TNODE, "ENODEHV")
1331 ENODELVM = (TNODE, "ENODELVM")
1332 ENODEN1 = (TNODE, "ENODEN1")
1333 ENODENET = (TNODE, "ENODENET")
1334 ENODEOS = (TNODE, "ENODEOS")
1335 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1336 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1337 ENODERPC = (TNODE, "ENODERPC")
1338 ENODESSH = (TNODE, "ENODESSH")
1339 ENODEVERSION = (TNODE, "ENODEVERSION")
1340 ENODESETUP = (TNODE, "ENODESETUP")
1341 ENODETIME = (TNODE, "ENODETIME")
1343 ETYPE_FIELD = "code"
1344 ETYPE_ERROR = "ERROR"
1345 ETYPE_WARNING = "WARNING"
1347 class NodeImage(object):
1348 """A class representing the logical and physical status of a node.
1351 @ivar name: the node name to which this object refers
1352 @ivar volumes: a structure as returned from
1353 L{ganeti.backend.GetVolumeList} (runtime)
1354 @ivar instances: a list of running instances (runtime)
1355 @ivar pinst: list of configured primary instances (config)
1356 @ivar sinst: list of configured secondary instances (config)
1357 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1358 of this node (config)
1359 @ivar mfree: free memory, as reported by hypervisor (runtime)
1360 @ivar dfree: free disk, as reported by the node (runtime)
1361 @ivar offline: the offline status (config)
1362 @type rpc_fail: boolean
1363 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1364 not whether the individual keys were correct) (runtime)
1365 @type lvm_fail: boolean
1366 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1367 @type hyp_fail: boolean
1368 @ivar hyp_fail: whether the RPC call didn't return the instance list
1369 @type ghost: boolean
1370 @ivar ghost: whether this is a known node or not (config)
1371 @type os_fail: boolean
1372 @ivar os_fail: whether the RPC call didn't return valid OS data
1374 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1377 def __init__(self, offline=False, name=None):
1386 self.offline = offline
1387 self.rpc_fail = False
1388 self.lvm_fail = False
1389 self.hyp_fail = False
1391 self.os_fail = False
1394 def ExpandNames(self):
1395 self.needed_locks = {
1396 locking.LEVEL_NODE: locking.ALL_SET,
1397 locking.LEVEL_INSTANCE: locking.ALL_SET,
1399 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1401 def _Error(self, ecode, item, msg, *args, **kwargs):
1402 """Format an error message.
1404 Based on the opcode's error_codes parameter, either format a
1405 parseable error code, or a simpler error string.
1407 This must be called only from Exec and functions called from Exec.
1410 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1412 # first complete the msg
1415 # then format the whole message
1416 if self.op.error_codes:
1417 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1423 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1424 # and finally report it via the feedback_fn
1425 self._feedback_fn(" - %s" % msg)
1427 def _ErrorIf(self, cond, *args, **kwargs):
1428 """Log an error message if the passed condition is True.
1431 cond = bool(cond) or self.op.debug_simulate_errors
1433 self._Error(*args, **kwargs)
1434 # do not mark the operation as failed for WARN cases only
1435 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1436 self.bad = self.bad or cond
1438 def _VerifyNode(self, ninfo, nresult):
1439 """Perform some basic validation on data returned from a node.
1441 - check the result data structure is well formed and has all the mandatory
1443 - check ganeti version
1445 @type ninfo: L{objects.Node}
1446 @param ninfo: the node to check
1447 @param nresult: the results from the node
1449 @return: whether overall this call was successful (and we can expect
1450 reasonable values in the respose)
1454 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1456 # main result, nresult should be a non-empty dict
1457 test = not nresult or not isinstance(nresult, dict)
1458 _ErrorIf(test, self.ENODERPC, node,
1459 "unable to verify node: no data returned")
1463 # compares ganeti version
1464 local_version = constants.PROTOCOL_VERSION
1465 remote_version = nresult.get("version", None)
1466 test = not (remote_version and
1467 isinstance(remote_version, (list, tuple)) and
1468 len(remote_version) == 2)
1469 _ErrorIf(test, self.ENODERPC, node,
1470 "connection to node returned invalid data")
1474 test = local_version != remote_version[0]
1475 _ErrorIf(test, self.ENODEVERSION, node,
1476 "incompatible protocol versions: master %s,"
1477 " node %s", local_version, remote_version[0])
1481 # node seems compatible, we can actually try to look into its results
1483 # full package version
1484 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1485 self.ENODEVERSION, node,
1486 "software version mismatch: master %s, node %s",
1487 constants.RELEASE_VERSION, remote_version[1],
1488 code=self.ETYPE_WARNING)
1490 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1491 if isinstance(hyp_result, dict):
1492 for hv_name, hv_result in hyp_result.iteritems():
1493 test = hv_result is not None
1494 _ErrorIf(test, self.ENODEHV, node,
1495 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1498 test = nresult.get(constants.NV_NODESETUP,
1499 ["Missing NODESETUP results"])
1500 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1505 def _VerifyNodeTime(self, ninfo, nresult,
1506 nvinfo_starttime, nvinfo_endtime):
1507 """Check the node time.
1509 @type ninfo: L{objects.Node}
1510 @param ninfo: the node to check
1511 @param nresult: the remote results for the node
1512 @param nvinfo_starttime: the start time of the RPC call
1513 @param nvinfo_endtime: the end time of the RPC call
1517 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1519 ntime = nresult.get(constants.NV_TIME, None)
1521 ntime_merged = utils.MergeTime(ntime)
1522 except (ValueError, TypeError):
1523 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1526 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1527 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1528 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1529 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1533 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1534 "Node time diverges by at least %s from master node time",
1537 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1538 """Check the node time.
1540 @type ninfo: L{objects.Node}
1541 @param ninfo: the node to check
1542 @param nresult: the remote results for the node
1543 @param vg_name: the configured VG name
1550 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1552 # checks vg existence and size > 20G
1553 vglist = nresult.get(constants.NV_VGLIST, None)
1555 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1557 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1558 constants.MIN_VG_SIZE)
1559 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1562 pvlist = nresult.get(constants.NV_PVLIST, None)
1563 test = pvlist is None
1564 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1566 # check that ':' is not present in PV names, since it's a
1567 # special character for lvcreate (denotes the range of PEs to
1569 for _, pvname, owner_vg in pvlist:
1570 test = ":" in pvname
1571 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1572 " '%s' of VG '%s'", pvname, owner_vg)
1574 def _VerifyNodeNetwork(self, ninfo, nresult):
1575 """Check the node time.
1577 @type ninfo: L{objects.Node}
1578 @param ninfo: the node to check
1579 @param nresult: the remote results for the node
1583 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1585 test = constants.NV_NODELIST not in nresult
1586 _ErrorIf(test, self.ENODESSH, node,
1587 "node hasn't returned node ssh connectivity data")
1589 if nresult[constants.NV_NODELIST]:
1590 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1591 _ErrorIf(True, self.ENODESSH, node,
1592 "ssh communication with node '%s': %s", a_node, a_msg)
1594 test = constants.NV_NODENETTEST not in nresult
1595 _ErrorIf(test, self.ENODENET, node,
1596 "node hasn't returned node tcp connectivity data")
1598 if nresult[constants.NV_NODENETTEST]:
1599 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1601 _ErrorIf(True, self.ENODENET, node,
1602 "tcp communication with node '%s': %s",
1603 anode, nresult[constants.NV_NODENETTEST][anode])
1605 test = constants.NV_MASTERIP not in nresult
1606 _ErrorIf(test, self.ENODENET, node,
1607 "node hasn't returned node master IP reachability data")
1609 if not nresult[constants.NV_MASTERIP]:
1610 if node == self.master_node:
1611 msg = "the master node cannot reach the master IP (not configured?)"
1613 msg = "cannot reach the master IP"
1614 _ErrorIf(True, self.ENODENET, node, msg)
1617 def _VerifyInstance(self, instance, instanceconfig, node_image):
1618 """Verify an instance.
1620 This function checks to see if the required block devices are
1621 available on the instance's node.
1624 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1625 node_current = instanceconfig.primary_node
1627 node_vol_should = {}
1628 instanceconfig.MapLVsByNode(node_vol_should)
1630 for node in node_vol_should:
1631 n_img = node_image[node]
1632 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1633 # ignore missing volumes on offline or broken nodes
1635 for volume in node_vol_should[node]:
1636 test = volume not in n_img.volumes
1637 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1638 "volume %s missing on node %s", volume, node)
1640 if instanceconfig.admin_up:
1641 pri_img = node_image[node_current]
1642 test = instance not in pri_img.instances and not pri_img.offline
1643 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1644 "instance not running on its primary node %s",
1647 for node, n_img in node_image.items():
1648 if (not node == node_current):
1649 test = instance in n_img.instances
1650 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1651 "instance should not run on node %s", node)
1653 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1654 """Verify if there are any unknown volumes in the cluster.
1656 The .os, .swap and backup volumes are ignored. All other volumes are
1657 reported as unknown.
1660 for node, n_img in node_image.items():
1661 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1662 # skip non-healthy nodes
1664 for volume in n_img.volumes:
1665 test = (node not in node_vol_should or
1666 volume not in node_vol_should[node])
1667 self._ErrorIf(test, self.ENODEORPHANLV, node,
1668 "volume %s is unknown", volume)
1670 def _VerifyOrphanInstances(self, instancelist, node_image):
1671 """Verify the list of running instances.
1673 This checks what instances are running but unknown to the cluster.
1676 for node, n_img in node_image.items():
1677 for o_inst in n_img.instances:
1678 test = o_inst not in instancelist
1679 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1680 "instance %s on node %s should not exist", o_inst, node)
1682 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1683 """Verify N+1 Memory Resilience.
1685 Check that if one single node dies we can still start all the
1686 instances it was primary for.
1689 for node, n_img in node_image.items():
1690 # This code checks that every node which is now listed as
1691 # secondary has enough memory to host all instances it is
1692 # supposed to should a single other node in the cluster fail.
1693 # FIXME: not ready for failover to an arbitrary node
1694 # FIXME: does not support file-backed instances
1695 # WARNING: we currently take into account down instances as well
1696 # as up ones, considering that even if they're down someone
1697 # might want to start them even in the event of a node failure.
1698 for prinode, instances in n_img.sbp.items():
1700 for instance in instances:
1701 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1702 if bep[constants.BE_AUTO_BALANCE]:
1703 needed_mem += bep[constants.BE_MEMORY]
1704 test = n_img.mfree < needed_mem
1705 self._ErrorIf(test, self.ENODEN1, node,
1706 "not enough memory on to accommodate"
1707 " failovers should peer node %s fail", prinode)
1709 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1711 """Verifies and computes the node required file checksums.
1713 @type ninfo: L{objects.Node}
1714 @param ninfo: the node to check
1715 @param nresult: the remote results for the node
1716 @param file_list: required list of files
1717 @param local_cksum: dictionary of local files and their checksums
1718 @param master_files: list of files that only masters should have
1722 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1724 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1725 test = not isinstance(remote_cksum, dict)
1726 _ErrorIf(test, self.ENODEFILECHECK, node,
1727 "node hasn't returned file checksum data")
1731 for file_name in file_list:
1732 node_is_mc = ninfo.master_candidate
1733 must_have = (file_name not in master_files) or node_is_mc
1735 test1 = file_name not in remote_cksum
1737 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1739 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1740 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1741 "file '%s' missing", file_name)
1742 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1743 "file '%s' has wrong checksum", file_name)
1744 # not candidate and this is not a must-have file
1745 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1746 "file '%s' should not exist on non master"
1747 " candidates (and the file is outdated)", file_name)
1748 # all good, except non-master/non-must have combination
1749 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1750 "file '%s' should not exist"
1751 " on non master candidates", file_name)
1753 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1755 """Verifies and the node DRBD status.
1757 @type ninfo: L{objects.Node}
1758 @param ninfo: the node to check
1759 @param nresult: the remote results for the node
1760 @param instanceinfo: the dict of instances
1761 @param drbd_helper: the configured DRBD usermode helper
1762 @param drbd_map: the DRBD map as returned by
1763 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1767 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1770 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1771 test = (helper_result == None)
1772 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1773 "no drbd usermode helper returned")
1775 status, payload = helper_result
1777 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1778 "drbd usermode helper check unsuccessful: %s", payload)
1779 test = status and (payload != drbd_helper)
1780 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1781 "wrong drbd usermode helper: %s", payload)
1783 # compute the DRBD minors
1785 for minor, instance in drbd_map[node].items():
1786 test = instance not in instanceinfo
1787 _ErrorIf(test, self.ECLUSTERCFG, None,
1788 "ghost instance '%s' in temporary DRBD map", instance)
1789 # ghost instance should not be running, but otherwise we
1790 # don't give double warnings (both ghost instance and
1791 # unallocated minor in use)
1793 node_drbd[minor] = (instance, False)
1795 instance = instanceinfo[instance]
1796 node_drbd[minor] = (instance.name, instance.admin_up)
1798 # and now check them
1799 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1800 test = not isinstance(used_minors, (tuple, list))
1801 _ErrorIf(test, self.ENODEDRBD, node,
1802 "cannot parse drbd status file: %s", str(used_minors))
1804 # we cannot check drbd status
1807 for minor, (iname, must_exist) in node_drbd.items():
1808 test = minor not in used_minors and must_exist
1809 _ErrorIf(test, self.ENODEDRBD, node,
1810 "drbd minor %d of instance %s is not active", minor, iname)
1811 for minor in used_minors:
1812 test = minor not in node_drbd
1813 _ErrorIf(test, self.ENODEDRBD, node,
1814 "unallocated drbd minor %d is in use", minor)
1816 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1817 """Builds the node OS structures.
1819 @type ninfo: L{objects.Node}
1820 @param ninfo: the node to check
1821 @param nresult: the remote results for the node
1822 @param nimg: the node image object
1826 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1828 remote_os = nresult.get(constants.NV_OSLIST, None)
1829 test = (not isinstance(remote_os, list) or
1830 not compat.all(isinstance(v, list) and len(v) == 7
1831 for v in remote_os))
1833 _ErrorIf(test, self.ENODEOS, node,
1834 "node hasn't returned valid OS data")
1843 for (name, os_path, status, diagnose,
1844 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1846 if name not in os_dict:
1849 # parameters is a list of lists instead of list of tuples due to
1850 # JSON lacking a real tuple type, fix it:
1851 parameters = [tuple(v) for v in parameters]
1852 os_dict[name].append((os_path, status, diagnose,
1853 set(variants), set(parameters), set(api_ver)))
1855 nimg.oslist = os_dict
1857 def _VerifyNodeOS(self, ninfo, nimg, base):
1858 """Verifies the node OS list.
1860 @type ninfo: L{objects.Node}
1861 @param ninfo: the node to check
1862 @param nimg: the node image object
1863 @param base: the 'template' node we match against (e.g. from the master)
1867 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1869 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1871 for os_name, os_data in nimg.oslist.items():
1872 assert os_data, "Empty OS status for OS %s?!" % os_name
1873 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1874 _ErrorIf(not f_status, self.ENODEOS, node,
1875 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1876 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1877 "OS '%s' has multiple entries (first one shadows the rest): %s",
1878 os_name, utils.CommaJoin([v[0] for v in os_data]))
1879 # this will catched in backend too
1880 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1881 and not f_var, self.ENODEOS, node,
1882 "OS %s with API at least %d does not declare any variant",
1883 os_name, constants.OS_API_V15)
1884 # comparisons with the 'base' image
1885 test = os_name not in base.oslist
1886 _ErrorIf(test, self.ENODEOS, node,
1887 "Extra OS %s not present on reference node (%s)",
1891 assert base.oslist[os_name], "Base node has empty OS status?"
1892 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1894 # base OS is invalid, skipping
1896 for kind, a, b in [("API version", f_api, b_api),
1897 ("variants list", f_var, b_var),
1898 ("parameters", f_param, b_param)]:
1899 _ErrorIf(a != b, self.ENODEOS, node,
1900 "OS %s %s differs from reference node %s: %s vs. %s",
1901 kind, os_name, base.name,
1902 utils.CommaJoin(a), utils.CommaJoin(b))
1904 # check any missing OSes
1905 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1906 _ErrorIf(missing, self.ENODEOS, node,
1907 "OSes present on reference node %s but missing on this node: %s",
1908 base.name, utils.CommaJoin(missing))
1910 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1911 """Verifies and updates the node volume data.
1913 This function will update a L{NodeImage}'s internal structures
1914 with data from the remote call.
1916 @type ninfo: L{objects.Node}
1917 @param ninfo: the node to check
1918 @param nresult: the remote results for the node
1919 @param nimg: the node image object
1920 @param vg_name: the configured VG name
1924 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1926 nimg.lvm_fail = True
1927 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1930 elif isinstance(lvdata, basestring):
1931 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1932 utils.SafeEncode(lvdata))
1933 elif not isinstance(lvdata, dict):
1934 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1936 nimg.volumes = lvdata
1937 nimg.lvm_fail = False
1939 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1940 """Verifies and updates the node instance list.
1942 If the listing was successful, then updates this node's instance
1943 list. Otherwise, it marks the RPC call as failed for the instance
1946 @type ninfo: L{objects.Node}
1947 @param ninfo: the node to check
1948 @param nresult: the remote results for the node
1949 @param nimg: the node image object
1952 idata = nresult.get(constants.NV_INSTANCELIST, None)
1953 test = not isinstance(idata, list)
1954 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1955 " (instancelist): %s", utils.SafeEncode(str(idata)))
1957 nimg.hyp_fail = True
1959 nimg.instances = idata
1961 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1962 """Verifies and computes a node information map
1964 @type ninfo: L{objects.Node}
1965 @param ninfo: the node to check
1966 @param nresult: the remote results for the node
1967 @param nimg: the node image object
1968 @param vg_name: the configured VG name
1972 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1974 # try to read free memory (from the hypervisor)
1975 hv_info = nresult.get(constants.NV_HVINFO, None)
1976 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1977 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1980 nimg.mfree = int(hv_info["memory_free"])
1981 except (ValueError, TypeError):
1982 _ErrorIf(True, self.ENODERPC, node,
1983 "node returned invalid nodeinfo, check hypervisor")
1985 # FIXME: devise a free space model for file based instances as well
1986 if vg_name is not None:
1987 test = (constants.NV_VGLIST not in nresult or
1988 vg_name not in nresult[constants.NV_VGLIST])
1989 _ErrorIf(test, self.ENODELVM, node,
1990 "node didn't return data for the volume group '%s'"
1991 " - it is either missing or broken", vg_name)
1994 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1995 except (ValueError, TypeError):
1996 _ErrorIf(True, self.ENODERPC, node,
1997 "node returned invalid LVM info, check LVM status")
1999 def BuildHooksEnv(self):
2002 Cluster-Verify hooks just ran in the post phase and their failure makes
2003 the output be logged in the verify output and the verification to fail.
2006 all_nodes = self.cfg.GetNodeList()
2008 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2010 for node in self.cfg.GetAllNodesInfo().values():
2011 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2013 return env, [], all_nodes
2015 def Exec(self, feedback_fn):
2016 """Verify integrity of cluster, performing various test on nodes.
2020 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2021 verbose = self.op.verbose
2022 self._feedback_fn = feedback_fn
2023 feedback_fn("* Verifying global settings")
2024 for msg in self.cfg.VerifyConfig():
2025 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2027 # Check the cluster certificates
2028 for cert_filename in constants.ALL_CERT_FILES:
2029 (errcode, msg) = _VerifyCertificate(cert_filename)
2030 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2032 vg_name = self.cfg.GetVGName()
2033 drbd_helper = self.cfg.GetDRBDHelper()
2034 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2035 cluster = self.cfg.GetClusterInfo()
2036 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2037 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2038 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2039 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2040 for iname in instancelist)
2041 i_non_redundant = [] # Non redundant instances
2042 i_non_a_balanced = [] # Non auto-balanced instances
2043 n_offline = 0 # Count of offline nodes
2044 n_drained = 0 # Count of nodes being drained
2045 node_vol_should = {}
2047 # FIXME: verify OS list
2048 # do local checksums
2049 master_files = [constants.CLUSTER_CONF_FILE]
2050 master_node = self.master_node = self.cfg.GetMasterNode()
2051 master_ip = self.cfg.GetMasterIP()
2053 file_names = ssconf.SimpleStore().GetFileList()
2054 file_names.extend(constants.ALL_CERT_FILES)
2055 file_names.extend(master_files)
2056 if cluster.modify_etc_hosts:
2057 file_names.append(constants.ETC_HOSTS)
2059 local_checksums = utils.FingerprintFiles(file_names)
2061 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2062 node_verify_param = {
2063 constants.NV_FILELIST: file_names,
2064 constants.NV_NODELIST: [node.name for node in nodeinfo
2065 if not node.offline],
2066 constants.NV_HYPERVISOR: hypervisors,
2067 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2068 node.secondary_ip) for node in nodeinfo
2069 if not node.offline],
2070 constants.NV_INSTANCELIST: hypervisors,
2071 constants.NV_VERSION: None,
2072 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2073 constants.NV_NODESETUP: None,
2074 constants.NV_TIME: None,
2075 constants.NV_MASTERIP: (master_node, master_ip),
2076 constants.NV_OSLIST: None,
2079 if vg_name is not None:
2080 node_verify_param[constants.NV_VGLIST] = None
2081 node_verify_param[constants.NV_LVLIST] = vg_name
2082 node_verify_param[constants.NV_PVLIST] = [vg_name]
2083 node_verify_param[constants.NV_DRBDLIST] = None
2086 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2088 # Build our expected cluster state
2089 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2091 for node in nodeinfo)
2093 for instance in instancelist:
2094 inst_config = instanceinfo[instance]
2096 for nname in inst_config.all_nodes:
2097 if nname not in node_image:
2099 gnode = self.NodeImage(name=nname)
2101 node_image[nname] = gnode
2103 inst_config.MapLVsByNode(node_vol_should)
2105 pnode = inst_config.primary_node
2106 node_image[pnode].pinst.append(instance)
2108 for snode in inst_config.secondary_nodes:
2109 nimg = node_image[snode]
2110 nimg.sinst.append(instance)
2111 if pnode not in nimg.sbp:
2112 nimg.sbp[pnode] = []
2113 nimg.sbp[pnode].append(instance)
2115 # At this point, we have the in-memory data structures complete,
2116 # except for the runtime information, which we'll gather next
2118 # Due to the way our RPC system works, exact response times cannot be
2119 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2120 # time before and after executing the request, we can at least have a time
2122 nvinfo_starttime = time.time()
2123 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2124 self.cfg.GetClusterName())
2125 nvinfo_endtime = time.time()
2127 all_drbd_map = self.cfg.ComputeDRBDMap()
2129 feedback_fn("* Verifying node status")
2133 for node_i in nodeinfo:
2135 nimg = node_image[node]
2139 feedback_fn("* Skipping offline node %s" % (node,))
2143 if node == master_node:
2145 elif node_i.master_candidate:
2146 ntype = "master candidate"
2147 elif node_i.drained:
2153 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2155 msg = all_nvinfo[node].fail_msg
2156 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2158 nimg.rpc_fail = True
2161 nresult = all_nvinfo[node].payload
2163 nimg.call_ok = self._VerifyNode(node_i, nresult)
2164 self._VerifyNodeNetwork(node_i, nresult)
2165 self._VerifyNodeLVM(node_i, nresult, vg_name)
2166 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2168 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2170 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2172 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2173 self._UpdateNodeInstances(node_i, nresult, nimg)
2174 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2175 self._UpdateNodeOS(node_i, nresult, nimg)
2176 if not nimg.os_fail:
2177 if refos_img is None:
2179 self._VerifyNodeOS(node_i, nimg, refos_img)
2181 feedback_fn("* Verifying instance status")
2182 for instance in instancelist:
2184 feedback_fn("* Verifying instance %s" % instance)
2185 inst_config = instanceinfo[instance]
2186 self._VerifyInstance(instance, inst_config, node_image)
2187 inst_nodes_offline = []
2189 pnode = inst_config.primary_node
2190 pnode_img = node_image[pnode]
2191 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2192 self.ENODERPC, pnode, "instance %s, connection to"
2193 " primary node failed", instance)
2195 if pnode_img.offline:
2196 inst_nodes_offline.append(pnode)
2198 # If the instance is non-redundant we cannot survive losing its primary
2199 # node, so we are not N+1 compliant. On the other hand we have no disk
2200 # templates with more than one secondary so that situation is not well
2202 # FIXME: does not support file-backed instances
2203 if not inst_config.secondary_nodes:
2204 i_non_redundant.append(instance)
2205 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2206 instance, "instance has multiple secondary nodes: %s",
2207 utils.CommaJoin(inst_config.secondary_nodes),
2208 code=self.ETYPE_WARNING)
2210 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2211 i_non_a_balanced.append(instance)
2213 for snode in inst_config.secondary_nodes:
2214 s_img = node_image[snode]
2215 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2216 "instance %s, connection to secondary node failed", instance)
2219 inst_nodes_offline.append(snode)
2221 # warn that the instance lives on offline nodes
2222 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2223 "instance lives on offline node(s) %s",
2224 utils.CommaJoin(inst_nodes_offline))
2225 # ... or ghost nodes
2226 for node in inst_config.all_nodes:
2227 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2228 "instance lives on ghost node %s", node)
2230 feedback_fn("* Verifying orphan volumes")
2231 self._VerifyOrphanVolumes(node_vol_should, node_image)
2233 feedback_fn("* Verifying orphan instances")
2234 self._VerifyOrphanInstances(instancelist, node_image)
2236 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2237 feedback_fn("* Verifying N+1 Memory redundancy")
2238 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2240 feedback_fn("* Other Notes")
2242 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2243 % len(i_non_redundant))
2245 if i_non_a_balanced:
2246 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2247 % len(i_non_a_balanced))
2250 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2253 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2257 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2258 """Analyze the post-hooks' result
2260 This method analyses the hook result, handles it, and sends some
2261 nicely-formatted feedback back to the user.
2263 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2264 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2265 @param hooks_results: the results of the multi-node hooks rpc call
2266 @param feedback_fn: function used send feedback back to the caller
2267 @param lu_result: previous Exec result
2268 @return: the new Exec result, based on the previous result
2272 # We only really run POST phase hooks, and are only interested in
2274 if phase == constants.HOOKS_PHASE_POST:
2275 # Used to change hooks' output to proper indentation
2276 indent_re = re.compile('^', re.M)
2277 feedback_fn("* Hooks Results")
2278 assert hooks_results, "invalid result from hooks"
2280 for node_name in hooks_results:
2281 res = hooks_results[node_name]
2283 test = msg and not res.offline
2284 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2285 "Communication failure in hooks execution: %s", msg)
2286 if res.offline or msg:
2287 # No need to investigate payload if node is offline or gave an error.
2288 # override manually lu_result here as _ErrorIf only
2289 # overrides self.bad
2292 for script, hkr, output in res.payload:
2293 test = hkr == constants.HKR_FAIL
2294 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2295 "Script %s failed, output:", script)
2297 output = indent_re.sub(' ', output)
2298 feedback_fn("%s" % output)
2304 class LUVerifyDisks(NoHooksLU):
2305 """Verifies the cluster disks status.
2310 def ExpandNames(self):
2311 self.needed_locks = {
2312 locking.LEVEL_NODE: locking.ALL_SET,
2313 locking.LEVEL_INSTANCE: locking.ALL_SET,
2315 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2317 def Exec(self, feedback_fn):
2318 """Verify integrity of cluster disks.
2320 @rtype: tuple of three items
2321 @return: a tuple of (dict of node-to-node_error, list of instances
2322 which need activate-disks, dict of instance: (node, volume) for
2326 result = res_nodes, res_instances, res_missing = {}, [], {}
2328 vg_name = self.cfg.GetVGName()
2329 nodes = utils.NiceSort(self.cfg.GetNodeList())
2330 instances = [self.cfg.GetInstanceInfo(name)
2331 for name in self.cfg.GetInstanceList()]
2334 for inst in instances:
2336 if (not inst.admin_up or
2337 inst.disk_template not in constants.DTS_NET_MIRROR):
2339 inst.MapLVsByNode(inst_lvs)
2340 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2341 for node, vol_list in inst_lvs.iteritems():
2342 for vol in vol_list:
2343 nv_dict[(node, vol)] = inst
2348 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2352 node_res = node_lvs[node]
2353 if node_res.offline:
2355 msg = node_res.fail_msg
2357 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2358 res_nodes[node] = msg
2361 lvs = node_res.payload
2362 for lv_name, (_, _, lv_online) in lvs.items():
2363 inst = nv_dict.pop((node, lv_name), None)
2364 if (not lv_online and inst is not None
2365 and inst.name not in res_instances):
2366 res_instances.append(inst.name)
2368 # any leftover items in nv_dict are missing LVs, let's arrange the
2370 for key, inst in nv_dict.iteritems():
2371 if inst.name not in res_missing:
2372 res_missing[inst.name] = []
2373 res_missing[inst.name].append(key)
2378 class LURepairDiskSizes(NoHooksLU):
2379 """Verifies the cluster disks sizes.
2382 _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2385 def ExpandNames(self):
2386 if self.op.instances:
2387 self.wanted_names = []
2388 for name in self.op.instances:
2389 full_name = _ExpandInstanceName(self.cfg, name)
2390 self.wanted_names.append(full_name)
2391 self.needed_locks = {
2392 locking.LEVEL_NODE: [],
2393 locking.LEVEL_INSTANCE: self.wanted_names,
2395 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2397 self.wanted_names = None
2398 self.needed_locks = {
2399 locking.LEVEL_NODE: locking.ALL_SET,
2400 locking.LEVEL_INSTANCE: locking.ALL_SET,
2402 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2404 def DeclareLocks(self, level):
2405 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2406 self._LockInstancesNodes(primary_only=True)
2408 def CheckPrereq(self):
2409 """Check prerequisites.
2411 This only checks the optional instance list against the existing names.
2414 if self.wanted_names is None:
2415 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2417 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2418 in self.wanted_names]
2420 def _EnsureChildSizes(self, disk):
2421 """Ensure children of the disk have the needed disk size.
2423 This is valid mainly for DRBD8 and fixes an issue where the
2424 children have smaller disk size.
2426 @param disk: an L{ganeti.objects.Disk} object
2429 if disk.dev_type == constants.LD_DRBD8:
2430 assert disk.children, "Empty children for DRBD8?"
2431 fchild = disk.children[0]
2432 mismatch = fchild.size < disk.size
2434 self.LogInfo("Child disk has size %d, parent %d, fixing",
2435 fchild.size, disk.size)
2436 fchild.size = disk.size
2438 # and we recurse on this child only, not on the metadev
2439 return self._EnsureChildSizes(fchild) or mismatch
2443 def Exec(self, feedback_fn):
2444 """Verify the size of cluster disks.
2447 # TODO: check child disks too
2448 # TODO: check differences in size between primary/secondary nodes
2450 for instance in self.wanted_instances:
2451 pnode = instance.primary_node
2452 if pnode not in per_node_disks:
2453 per_node_disks[pnode] = []
2454 for idx, disk in enumerate(instance.disks):
2455 per_node_disks[pnode].append((instance, idx, disk))
2458 for node, dskl in per_node_disks.items():
2459 newl = [v[2].Copy() for v in dskl]
2461 self.cfg.SetDiskID(dsk, node)
2462 result = self.rpc.call_blockdev_getsizes(node, newl)
2464 self.LogWarning("Failure in blockdev_getsizes call to node"
2465 " %s, ignoring", node)
2467 if len(result.data) != len(dskl):
2468 self.LogWarning("Invalid result from node %s, ignoring node results",
2471 for ((instance, idx, disk), size) in zip(dskl, result.data):
2473 self.LogWarning("Disk %d of instance %s did not return size"
2474 " information, ignoring", idx, instance.name)
2476 if not isinstance(size, (int, long)):
2477 self.LogWarning("Disk %d of instance %s did not return valid"
2478 " size information, ignoring", idx, instance.name)
2481 if size != disk.size:
2482 self.LogInfo("Disk %d of instance %s has mismatched size,"
2483 " correcting: recorded %d, actual %d", idx,
2484 instance.name, disk.size, size)
2486 self.cfg.Update(instance, feedback_fn)
2487 changed.append((instance.name, idx, size))
2488 if self._EnsureChildSizes(disk):
2489 self.cfg.Update(instance, feedback_fn)
2490 changed.append((instance.name, idx, disk.size))
2494 class LURenameCluster(LogicalUnit):
2495 """Rename the cluster.
2498 HPATH = "cluster-rename"
2499 HTYPE = constants.HTYPE_CLUSTER
2500 _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2502 def BuildHooksEnv(self):
2507 "OP_TARGET": self.cfg.GetClusterName(),
2508 "NEW_NAME": self.op.name,
2510 mn = self.cfg.GetMasterNode()
2511 all_nodes = self.cfg.GetNodeList()
2512 return env, [mn], all_nodes
2514 def CheckPrereq(self):
2515 """Verify that the passed name is a valid one.
2518 hostname = netutils.GetHostInfo(self.op.name)
2520 new_name = hostname.name
2521 self.ip = new_ip = hostname.ip
2522 old_name = self.cfg.GetClusterName()
2523 old_ip = self.cfg.GetMasterIP()
2524 if new_name == old_name and new_ip == old_ip:
2525 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2526 " cluster has changed",
2528 if new_ip != old_ip:
2529 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2530 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2531 " reachable on the network. Aborting." %
2532 new_ip, errors.ECODE_NOTUNIQUE)
2534 self.op.name = new_name
2536 def Exec(self, feedback_fn):
2537 """Rename the cluster.
2540 clustername = self.op.name
2543 # shutdown the master IP
2544 master = self.cfg.GetMasterNode()
2545 result = self.rpc.call_node_stop_master(master, False)
2546 result.Raise("Could not disable the master role")
2549 cluster = self.cfg.GetClusterInfo()
2550 cluster.cluster_name = clustername
2551 cluster.master_ip = ip
2552 self.cfg.Update(cluster, feedback_fn)
2554 # update the known hosts file
2555 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2556 node_list = self.cfg.GetNodeList()
2558 node_list.remove(master)
2561 result = self.rpc.call_upload_file(node_list,
2562 constants.SSH_KNOWN_HOSTS_FILE)
2563 for to_node, to_result in result.iteritems():
2564 msg = to_result.fail_msg
2566 msg = ("Copy of file %s to node %s failed: %s" %
2567 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2568 self.proc.LogWarning(msg)
2571 result = self.rpc.call_node_start_master(master, False, False)
2572 msg = result.fail_msg
2574 self.LogWarning("Could not re-enable the master role on"
2575 " the master, please restart manually: %s", msg)
2578 class LUSetClusterParams(LogicalUnit):
2579 """Change the parameters of the cluster.
2582 HPATH = "cluster-modify"
2583 HTYPE = constants.HTYPE_CLUSTER
2585 ("vg_name", None, _TMaybeString),
2586 ("enabled_hypervisors", None,
2587 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2588 ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2589 ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2590 ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2591 ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2592 ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2593 ("uid_pool", None, _NoType),
2594 ("add_uids", None, _NoType),
2595 ("remove_uids", None, _NoType),
2596 ("maintain_node_health", None, _TMaybeBool),
2597 ("nicparams", None, _TOr(_TDict, _TNone)),
2598 ("drbd_helper", None, _TOr(_TString, _TNone)),
2599 ("default_iallocator", None, _TMaybeString),
2603 def CheckArguments(self):
2607 if self.op.uid_pool:
2608 uidpool.CheckUidPool(self.op.uid_pool)
2610 if self.op.add_uids:
2611 uidpool.CheckUidPool(self.op.add_uids)
2613 if self.op.remove_uids:
2614 uidpool.CheckUidPool(self.op.remove_uids)
2616 def ExpandNames(self):
2617 # FIXME: in the future maybe other cluster params won't require checking on
2618 # all nodes to be modified.
2619 self.needed_locks = {
2620 locking.LEVEL_NODE: locking.ALL_SET,
2622 self.share_locks[locking.LEVEL_NODE] = 1
2624 def BuildHooksEnv(self):
2629 "OP_TARGET": self.cfg.GetClusterName(),
2630 "NEW_VG_NAME": self.op.vg_name,
2632 mn = self.cfg.GetMasterNode()
2633 return env, [mn], [mn]
2635 def CheckPrereq(self):
2636 """Check prerequisites.
2638 This checks whether the given params don't conflict and
2639 if the given volume group is valid.
2642 if self.op.vg_name is not None and not self.op.vg_name:
2643 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2644 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2645 " instances exist", errors.ECODE_INVAL)
2647 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2648 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2649 raise errors.OpPrereqError("Cannot disable drbd helper while"
2650 " drbd-based instances exist",
2653 node_list = self.acquired_locks[locking.LEVEL_NODE]
2655 # if vg_name not None, checks given volume group on all nodes
2657 vglist = self.rpc.call_vg_list(node_list)
2658 for node in node_list:
2659 msg = vglist[node].fail_msg
2661 # ignoring down node
2662 self.LogWarning("Error while gathering data on node %s"
2663 " (ignoring node): %s", node, msg)
2665 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2667 constants.MIN_VG_SIZE)
2669 raise errors.OpPrereqError("Error on node '%s': %s" %
2670 (node, vgstatus), errors.ECODE_ENVIRON)
2672 if self.op.drbd_helper:
2673 # checks given drbd helper on all nodes
2674 helpers = self.rpc.call_drbd_helper(node_list)
2675 for node in node_list:
2676 ninfo = self.cfg.GetNodeInfo(node)
2678 self.LogInfo("Not checking drbd helper on offline node %s", node)
2680 msg = helpers[node].fail_msg
2682 raise errors.OpPrereqError("Error checking drbd helper on node"
2683 " '%s': %s" % (node, msg),
2684 errors.ECODE_ENVIRON)
2685 node_helper = helpers[node].payload
2686 if node_helper != self.op.drbd_helper:
2687 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2688 (node, node_helper), errors.ECODE_ENVIRON)
2690 self.cluster = cluster = self.cfg.GetClusterInfo()
2691 # validate params changes
2692 if self.op.beparams:
2693 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2694 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2696 if self.op.nicparams:
2697 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2698 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2699 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2702 # check all instances for consistency
2703 for instance in self.cfg.GetAllInstancesInfo().values():
2704 for nic_idx, nic in enumerate(instance.nics):
2705 params_copy = copy.deepcopy(nic.nicparams)
2706 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2708 # check parameter syntax
2710 objects.NIC.CheckParameterSyntax(params_filled)
2711 except errors.ConfigurationError, err:
2712 nic_errors.append("Instance %s, nic/%d: %s" %
2713 (instance.name, nic_idx, err))
2715 # if we're moving instances to routed, check that they have an ip
2716 target_mode = params_filled[constants.NIC_MODE]
2717 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2718 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2719 (instance.name, nic_idx))
2721 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2722 "\n".join(nic_errors))
2724 # hypervisor list/parameters
2725 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2726 if self.op.hvparams:
2727 for hv_name, hv_dict in self.op.hvparams.items():
2728 if hv_name not in self.new_hvparams:
2729 self.new_hvparams[hv_name] = hv_dict
2731 self.new_hvparams[hv_name].update(hv_dict)
2733 # os hypervisor parameters
2734 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2736 for os_name, hvs in self.op.os_hvp.items():
2737 if os_name not in self.new_os_hvp:
2738 self.new_os_hvp[os_name] = hvs
2740 for hv_name, hv_dict in hvs.items():
2741 if hv_name not in self.new_os_hvp[os_name]:
2742 self.new_os_hvp[os_name][hv_name] = hv_dict
2744 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2747 self.new_osp = objects.FillDict(cluster.osparams, {})
2748 if self.op.osparams:
2749 for os_name, osp in self.op.osparams.items():
2750 if os_name not in self.new_osp:
2751 self.new_osp[os_name] = {}
2753 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2756 if not self.new_osp[os_name]:
2757 # we removed all parameters
2758 del self.new_osp[os_name]
2760 # check the parameter validity (remote check)
2761 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2762 os_name, self.new_osp[os_name])
2764 # changes to the hypervisor list
2765 if self.op.enabled_hypervisors is not None:
2766 self.hv_list = self.op.enabled_hypervisors
2767 for hv in self.hv_list:
2768 # if the hypervisor doesn't already exist in the cluster
2769 # hvparams, we initialize it to empty, and then (in both
2770 # cases) we make sure to fill the defaults, as we might not
2771 # have a complete defaults list if the hypervisor wasn't
2773 if hv not in new_hvp:
2775 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2776 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2778 self.hv_list = cluster.enabled_hypervisors
2780 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2781 # either the enabled list has changed, or the parameters have, validate
2782 for hv_name, hv_params in self.new_hvparams.items():
2783 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2784 (self.op.enabled_hypervisors and
2785 hv_name in self.op.enabled_hypervisors)):
2786 # either this is a new hypervisor, or its parameters have changed
2787 hv_class = hypervisor.GetHypervisor(hv_name)
2788 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2789 hv_class.CheckParameterSyntax(hv_params)
2790 _CheckHVParams(self, node_list, hv_name, hv_params)
2793 # no need to check any newly-enabled hypervisors, since the
2794 # defaults have already been checked in the above code-block
2795 for os_name, os_hvp in self.new_os_hvp.items():
2796 for hv_name, hv_params in os_hvp.items():
2797 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2798 # we need to fill in the new os_hvp on top of the actual hv_p
2799 cluster_defaults = self.new_hvparams.get(hv_name, {})
2800 new_osp = objects.FillDict(cluster_defaults, hv_params)
2801 hv_class = hypervisor.GetHypervisor(hv_name)
2802 hv_class.CheckParameterSyntax(new_osp)
2803 _CheckHVParams(self, node_list, hv_name, new_osp)
2805 if self.op.default_iallocator:
2806 alloc_script = utils.FindFile(self.op.default_iallocator,
2807 constants.IALLOCATOR_SEARCH_PATH,
2809 if alloc_script is None:
2810 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2811 " specified" % self.op.default_iallocator,
2814 def Exec(self, feedback_fn):
2815 """Change the parameters of the cluster.
2818 if self.op.vg_name is not None:
2819 new_volume = self.op.vg_name
2822 if new_volume != self.cfg.GetVGName():
2823 self.cfg.SetVGName(new_volume)
2825 feedback_fn("Cluster LVM configuration already in desired"
2826 " state, not changing")
2827 if self.op.drbd_helper is not None:
2828 new_helper = self.op.drbd_helper
2831 if new_helper != self.cfg.GetDRBDHelper():
2832 self.cfg.SetDRBDHelper(new_helper)
2834 feedback_fn("Cluster DRBD helper already in desired state,"
2836 if self.op.hvparams:
2837 self.cluster.hvparams = self.new_hvparams
2839 self.cluster.os_hvp = self.new_os_hvp
2840 if self.op.enabled_hypervisors is not None:
2841 self.cluster.hvparams = self.new_hvparams
2842 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2843 if self.op.beparams:
2844 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2845 if self.op.nicparams:
2846 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2847 if self.op.osparams:
2848 self.cluster.osparams = self.new_osp
2850 if self.op.candidate_pool_size is not None:
2851 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2852 # we need to update the pool size here, otherwise the save will fail
2853 _AdjustCandidatePool(self, [])
2855 if self.op.maintain_node_health is not None:
2856 self.cluster.maintain_node_health = self.op.maintain_node_health
2858 if self.op.add_uids is not None:
2859 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2861 if self.op.remove_uids is not None:
2862 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2864 if self.op.uid_pool is not None:
2865 self.cluster.uid_pool = self.op.uid_pool
2867 if self.op.default_iallocator is not None:
2868 self.cluster.default_iallocator = self.op.default_iallocator
2870 self.cfg.Update(self.cluster, feedback_fn)
2873 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2874 """Distribute additional files which are part of the cluster configuration.
2876 ConfigWriter takes care of distributing the config and ssconf files, but
2877 there are more files which should be distributed to all nodes. This function
2878 makes sure those are copied.
2880 @param lu: calling logical unit
2881 @param additional_nodes: list of nodes not in the config to distribute to
2884 # 1. Gather target nodes
2885 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2886 dist_nodes = lu.cfg.GetOnlineNodeList()
2887 if additional_nodes is not None:
2888 dist_nodes.extend(additional_nodes)
2889 if myself.name in dist_nodes:
2890 dist_nodes.remove(myself.name)
2892 # 2. Gather files to distribute
2893 dist_files = set([constants.ETC_HOSTS,
2894 constants.SSH_KNOWN_HOSTS_FILE,
2895 constants.RAPI_CERT_FILE,
2896 constants.RAPI_USERS_FILE,
2897 constants.CONFD_HMAC_KEY,
2898 constants.CLUSTER_DOMAIN_SECRET_FILE,
2901 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2902 for hv_name in enabled_hypervisors:
2903 hv_class = hypervisor.GetHypervisor(hv_name)
2904 dist_files.update(hv_class.GetAncillaryFiles())
2906 # 3. Perform the files upload
2907 for fname in dist_files:
2908 if os.path.exists(fname):
2909 result = lu.rpc.call_upload_file(dist_nodes, fname)
2910 for to_node, to_result in result.items():
2911 msg = to_result.fail_msg
2913 msg = ("Copy of file %s to node %s failed: %s" %
2914 (fname, to_node, msg))
2915 lu.proc.LogWarning(msg)
2918 class LURedistributeConfig(NoHooksLU):
2919 """Force the redistribution of cluster configuration.
2921 This is a very simple LU.
2926 def ExpandNames(self):
2927 self.needed_locks = {
2928 locking.LEVEL_NODE: locking.ALL_SET,
2930 self.share_locks[locking.LEVEL_NODE] = 1
2932 def Exec(self, feedback_fn):
2933 """Redistribute the configuration.
2936 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2937 _RedistributeAncillaryFiles(self)
2940 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2941 """Sleep and poll for an instance's disk to sync.
2944 if not instance.disks or disks is not None and not disks:
2947 disks = _ExpandCheckDisks(instance, disks)
2950 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2952 node = instance.primary_node
2955 lu.cfg.SetDiskID(dev, node)
2957 # TODO: Convert to utils.Retry
2960 degr_retries = 10 # in seconds, as we sleep 1 second each time
2964 cumul_degraded = False
2965 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2966 msg = rstats.fail_msg
2968 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2971 raise errors.RemoteError("Can't contact node %s for mirror data,"
2972 " aborting." % node)
2975 rstats = rstats.payload
2977 for i, mstat in enumerate(rstats):
2979 lu.LogWarning("Can't compute data for node %s/%s",
2980 node, disks[i].iv_name)
2983 cumul_degraded = (cumul_degraded or
2984 (mstat.is_degraded and mstat.sync_percent is None))
2985 if mstat.sync_percent is not None:
2987 if mstat.estimated_time is not None:
2988 rem_time = ("%s remaining (estimated)" %
2989 utils.FormatSeconds(mstat.estimated_time))
2990 max_time = mstat.estimated_time
2992 rem_time = "no time estimate"
2993 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2994 (disks[i].iv_name, mstat.sync_percent, rem_time))
2996 # if we're done but degraded, let's do a few small retries, to
2997 # make sure we see a stable and not transient situation; therefore
2998 # we force restart of the loop
2999 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3000 logging.info("Degraded disks found, %d retries left", degr_retries)
3008 time.sleep(min(60, max_time))
3011 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3012 return not cumul_degraded
3015 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3016 """Check that mirrors are not degraded.
3018 The ldisk parameter, if True, will change the test from the
3019 is_degraded attribute (which represents overall non-ok status for
3020 the device(s)) to the ldisk (representing the local storage status).
3023 lu.cfg.SetDiskID(dev, node)
3027 if on_primary or dev.AssembleOnSecondary():
3028 rstats = lu.rpc.call_blockdev_find(node, dev)
3029 msg = rstats.fail_msg
3031 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3033 elif not rstats.payload:
3034 lu.LogWarning("Can't find disk on node %s", node)
3038 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3040 result = result and not rstats.payload.is_degraded
3043 for child in dev.children:
3044 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3049 class LUDiagnoseOS(NoHooksLU):
3050 """Logical unit for OS diagnose/query.
3055 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3058 _FIELDS_STATIC = utils.FieldSet()
3059 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
3060 "parameters", "api_versions")
3062 def CheckArguments(self):
3064 raise errors.OpPrereqError("Selective OS query not supported",
3067 _CheckOutputFields(static=self._FIELDS_STATIC,
3068 dynamic=self._FIELDS_DYNAMIC,
3069 selected=self.op.output_fields)
3071 def ExpandNames(self):
3072 # Lock all nodes, in shared mode
3073 # Temporary removal of locks, should be reverted later
3074 # TODO: reintroduce locks when they are lighter-weight
3075 self.needed_locks = {}
3076 #self.share_locks[locking.LEVEL_NODE] = 1
3077 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3080 def _DiagnoseByOS(rlist):
3081 """Remaps a per-node return list into an a per-os per-node dictionary
3083 @param rlist: a map with node names as keys and OS objects as values
3086 @return: a dictionary with osnames as keys and as value another
3087 map, with nodes as keys and tuples of (path, status, diagnose,
3088 variants, parameters, api_versions) as values, eg::
3090 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3091 (/srv/..., False, "invalid api")],
3092 "node2": [(/srv/..., True, "", [], [])]}
3097 # we build here the list of nodes that didn't fail the RPC (at RPC
3098 # level), so that nodes with a non-responding node daemon don't
3099 # make all OSes invalid
3100 good_nodes = [node_name for node_name in rlist
3101 if not rlist[node_name].fail_msg]
3102 for node_name, nr in rlist.items():
3103 if nr.fail_msg or not nr.payload:
3105 for (name, path, status, diagnose, variants,
3106 params, api_versions) in nr.payload:
3107 if name not in all_os:
3108 # build a list of nodes for this os containing empty lists
3109 # for each node in node_list
3111 for nname in good_nodes:
3112 all_os[name][nname] = []
3113 # convert params from [name, help] to (name, help)
3114 params = [tuple(v) for v in params]
3115 all_os[name][node_name].append((path, status, diagnose,
3116 variants, params, api_versions))
3119 def Exec(self, feedback_fn):
3120 """Compute the list of OSes.
3123 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3124 node_data = self.rpc.call_os_diagnose(valid_nodes)
3125 pol = self._DiagnoseByOS(node_data)
3128 for os_name, os_data in pol.items():
3131 (variants, params, api_versions) = null_state = (set(), set(), set())
3132 for idx, osl in enumerate(os_data.values()):
3133 valid = bool(valid and osl and osl[0][1])
3135 (variants, params, api_versions) = null_state
3137 node_variants, node_params, node_api = osl[0][3:6]
3138 if idx == 0: # first entry
3139 variants = set(node_variants)
3140 params = set(node_params)
3141 api_versions = set(node_api)
3142 else: # keep consistency
3143 variants.intersection_update(node_variants)
3144 params.intersection_update(node_params)
3145 api_versions.intersection_update(node_api)
3147 for field in self.op.output_fields:
3150 elif field == "valid":
3152 elif field == "node_status":
3153 # this is just a copy of the dict
3155 for node_name, nos_list in os_data.items():
3156 val[node_name] = nos_list
3157 elif field == "variants":
3158 val = list(variants)
3159 elif field == "parameters":
3161 elif field == "api_versions":
3162 val = list(api_versions)
3164 raise errors.ParameterError(field)
3171 class LURemoveNode(LogicalUnit):
3172 """Logical unit for removing a node.
3175 HPATH = "node-remove"
3176 HTYPE = constants.HTYPE_NODE
3181 def BuildHooksEnv(self):
3184 This doesn't run on the target node in the pre phase as a failed
3185 node would then be impossible to remove.
3189 "OP_TARGET": self.op.node_name,
3190 "NODE_NAME": self.op.node_name,
3192 all_nodes = self.cfg.GetNodeList()
3194 all_nodes.remove(self.op.node_name)
3196 logging.warning("Node %s which is about to be removed not found"
3197 " in the all nodes list", self.op.node_name)
3198 return env, all_nodes, all_nodes
3200 def CheckPrereq(self):
3201 """Check prerequisites.
3204 - the node exists in the configuration
3205 - it does not have primary or secondary instances
3206 - it's not the master
3208 Any errors are signaled by raising errors.OpPrereqError.
3211 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3212 node = self.cfg.GetNodeInfo(self.op.node_name)
3213 assert node is not None
3215 instance_list = self.cfg.GetInstanceList()
3217 masternode = self.cfg.GetMasterNode()
3218 if node.name == masternode:
3219 raise errors.OpPrereqError("Node is the master node,"
3220 " you need to failover first.",
3223 for instance_name in instance_list:
3224 instance = self.cfg.GetInstanceInfo(instance_name)
3225 if node.name in instance.all_nodes:
3226 raise errors.OpPrereqError("Instance %s is still running on the node,"
3227 " please remove first." % instance_name,
3229 self.op.node_name = node.name
3232 def Exec(self, feedback_fn):
3233 """Removes the node from the cluster.
3237 logging.info("Stopping the node daemon and removing configs from node %s",
3240 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3242 # Promote nodes to master candidate as needed
3243 _AdjustCandidatePool(self, exceptions=[node.name])
3244 self.context.RemoveNode(node.name)
3246 # Run post hooks on the node before it's removed
3247 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3249 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3251 # pylint: disable-msg=W0702
3252 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3254 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3255 msg = result.fail_msg
3257 self.LogWarning("Errors encountered on the remote node while leaving"
3258 " the cluster: %s", msg)
3260 # Remove node from our /etc/hosts
3261 if self.cfg.GetClusterInfo().modify_etc_hosts:
3262 # FIXME: this should be done via an rpc call to node daemon
3263 utils.RemoveHostFromEtcHosts(node.name)
3264 _RedistributeAncillaryFiles(self)
3267 class LUQueryNodes(NoHooksLU):
3268 """Logical unit for querying nodes.
3271 # pylint: disable-msg=W0142
3274 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3275 ("use_locking", False, _TBool),
3279 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3280 "master_candidate", "offline", "drained"]
3282 _FIELDS_DYNAMIC = utils.FieldSet(
3284 "mtotal", "mnode", "mfree",
3286 "ctotal", "cnodes", "csockets",
3289 _FIELDS_STATIC = utils.FieldSet(*[
3290 "pinst_cnt", "sinst_cnt",
3291 "pinst_list", "sinst_list",
3292 "pip", "sip", "tags",
3294 "role"] + _SIMPLE_FIELDS
3297 def CheckArguments(self):
3298 _CheckOutputFields(static=self._FIELDS_STATIC,
3299 dynamic=self._FIELDS_DYNAMIC,
3300 selected=self.op.output_fields)
3302 def ExpandNames(self):
3303 self.needed_locks = {}
3304 self.share_locks[locking.LEVEL_NODE] = 1
3307 self.wanted = _GetWantedNodes(self, self.op.names)
3309 self.wanted = locking.ALL_SET
3311 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3312 self.do_locking = self.do_node_query and self.op.use_locking
3314 # if we don't request only static fields, we need to lock the nodes
3315 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3317 def Exec(self, feedback_fn):
3318 """Computes the list of nodes and their attributes.
3321 all_info = self.cfg.GetAllNodesInfo()
3323 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3324 elif self.wanted != locking.ALL_SET:
3325 nodenames = self.wanted
3326 missing = set(nodenames).difference(all_info.keys())
3328 raise errors.OpExecError(
3329 "Some nodes were removed before retrieving their data: %s" % missing)
3331 nodenames = all_info.keys()
3333 nodenames = utils.NiceSort(nodenames)
3334 nodelist = [all_info[name] for name in nodenames]
3336 # begin data gathering
3338 if self.do_node_query:
3340 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3341 self.cfg.GetHypervisorType())
3342 for name in nodenames:
3343 nodeinfo = node_data[name]
3344 if not nodeinfo.fail_msg and nodeinfo.payload:
3345 nodeinfo = nodeinfo.payload
3346 fn = utils.TryConvert
3348 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3349 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3350 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3351 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3352 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3353 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3354 "bootid": nodeinfo.get('bootid', None),
3355 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3356 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3359 live_data[name] = {}
3361 live_data = dict.fromkeys(nodenames, {})
3363 node_to_primary = dict([(name, set()) for name in nodenames])
3364 node_to_secondary = dict([(name, set()) for name in nodenames])
3366 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3367 "sinst_cnt", "sinst_list"))
3368 if inst_fields & frozenset(self.op.output_fields):
3369 inst_data = self.cfg.GetAllInstancesInfo()
3371 for inst in inst_data.values():
3372 if inst.primary_node in node_to_primary:
3373 node_to_primary[inst.primary_node].add(inst.name)
3374 for secnode in inst.secondary_nodes:
3375 if secnode in node_to_secondary:
3376 node_to_secondary[secnode].add(inst.name)
3378 master_node = self.cfg.GetMasterNode()
3380 # end data gathering
3383 for node in nodelist:
3385 for field in self.op.output_fields:
3386 if field in self._SIMPLE_FIELDS:
3387 val = getattr(node, field)
3388 elif field == "pinst_list":
3389 val = list(node_to_primary[node.name])
3390 elif field == "sinst_list":
3391 val = list(node_to_secondary[node.name])
3392 elif field == "pinst_cnt":
3393 val = len(node_to_primary[node.name])
3394 elif field == "sinst_cnt":
3395 val = len(node_to_secondary[node.name])
3396 elif field == "pip":
3397 val = node.primary_ip
3398 elif field == "sip":
3399 val = node.secondary_ip
3400 elif field == "tags":
3401 val = list(node.GetTags())
3402 elif field == "master":
3403 val = node.name == master_node
3404 elif self._FIELDS_DYNAMIC.Matches(field):
3405 val = live_data[node.name].get(field, None)
3406 elif field == "role":
3407 if node.name == master_node:
3409 elif node.master_candidate:
3418 raise errors.ParameterError(field)
3419 node_output.append(val)
3420 output.append(node_output)
3425 class LUQueryNodeVolumes(NoHooksLU):
3426 """Logical unit for getting volumes on node(s).
3430 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3431 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3434 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3435 _FIELDS_STATIC = utils.FieldSet("node")
3437 def CheckArguments(self):
3438 _CheckOutputFields(static=self._FIELDS_STATIC,
3439 dynamic=self._FIELDS_DYNAMIC,
3440 selected=self.op.output_fields)
3442 def ExpandNames(self):
3443 self.needed_locks = {}
3444 self.share_locks[locking.LEVEL_NODE] = 1
3445 if not self.op.nodes:
3446 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3448 self.needed_locks[locking.LEVEL_NODE] = \
3449 _GetWantedNodes(self, self.op.nodes)
3451 def Exec(self, feedback_fn):
3452 """Computes the list of nodes and their attributes.
3455 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3456 volumes = self.rpc.call_node_volumes(nodenames)
3458 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3459 in self.cfg.GetInstanceList()]
3461 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3464 for node in nodenames:
3465 nresult = volumes[node]
3468 msg = nresult.fail_msg
3470 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3473 node_vols = nresult.payload[:]
3474 node_vols.sort(key=lambda vol: vol['dev'])
3476 for vol in node_vols:
3478 for field in self.op.output_fields:
3481 elif field == "phys":
3485 elif field == "name":
3487 elif field == "size":
3488 val = int(float(vol['size']))
3489 elif field == "instance":
3491 if node not in lv_by_node[inst]:
3493 if vol['name'] in lv_by_node[inst][node]:
3499 raise errors.ParameterError(field)
3500 node_output.append(str(val))
3502 output.append(node_output)
3507 class LUQueryNodeStorage(NoHooksLU):
3508 """Logical unit for getting information on storage units on node(s).
3511 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3513 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3514 ("storage_type", _NoDefault, _CheckStorageType),
3515 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3516 ("name", None, _TMaybeString),
3520 def CheckArguments(self):
3521 _CheckOutputFields(static=self._FIELDS_STATIC,
3522 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3523 selected=self.op.output_fields)
3525 def ExpandNames(self):
3526 self.needed_locks = {}
3527 self.share_locks[locking.LEVEL_NODE] = 1
3530 self.needed_locks[locking.LEVEL_NODE] = \
3531 _GetWantedNodes(self, self.op.nodes)
3533 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3535 def Exec(self, feedback_fn):
3536 """Computes the list of nodes and their attributes.
3539 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3541 # Always get name to sort by
3542 if constants.SF_NAME in self.op.output_fields:
3543 fields = self.op.output_fields[:]
3545 fields = [constants.SF_NAME] + self.op.output_fields
3547 # Never ask for node or type as it's only known to the LU
3548 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3549 while extra in fields:
3550 fields.remove(extra)
3552 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3553 name_idx = field_idx[constants.SF_NAME]
3555 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3556 data = self.rpc.call_storage_list(self.nodes,
3557 self.op.storage_type, st_args,
3558 self.op.name, fields)
3562 for node in utils.NiceSort(self.nodes):
3563 nresult = data[node]
3567 msg = nresult.fail_msg
3569 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3572 rows = dict([(row[name_idx], row) for row in nresult.payload])
3574 for name in utils.NiceSort(rows.keys()):
3579 for field in self.op.output_fields:
3580 if field == constants.SF_NODE:
3582 elif field == constants.SF_TYPE:
3583 val = self.op.storage_type
3584 elif field in field_idx:
3585 val = row[field_idx[field]]
3587 raise errors.ParameterError(field)
3596 class LUModifyNodeStorage(NoHooksLU):
3597 """Logical unit for modifying a storage volume on a node.
3602 ("storage_type", _NoDefault, _CheckStorageType),
3603 ("name", _NoDefault, _TNonEmptyString),
3604 ("changes", _NoDefault, _TDict),
3608 def CheckArguments(self):
3609 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3611 storage_type = self.op.storage_type
3614 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3616 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3617 " modified" % storage_type,
3620 diff = set(self.op.changes.keys()) - modifiable
3622 raise errors.OpPrereqError("The following fields can not be modified for"
3623 " storage units of type '%s': %r" %
3624 (storage_type, list(diff)),
3627 def ExpandNames(self):
3628 self.needed_locks = {
3629 locking.LEVEL_NODE: self.op.node_name,
3632 def Exec(self, feedback_fn):
3633 """Computes the list of nodes and their attributes.
3636 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3637 result = self.rpc.call_storage_modify(self.op.node_name,
3638 self.op.storage_type, st_args,
3639 self.op.name, self.op.changes)
3640 result.Raise("Failed to modify storage unit '%s' on %s" %
3641 (self.op.name, self.op.node_name))
3644 class LUAddNode(LogicalUnit):
3645 """Logical unit for adding node to the cluster.
3649 HTYPE = constants.HTYPE_NODE
3652 ("primary_ip", None, _NoType),
3653 ("secondary_ip", None, _TMaybeString),
3654 ("readd", False, _TBool),
3657 def CheckArguments(self):
3658 # validate/normalize the node name
3659 self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3661 def BuildHooksEnv(self):
3664 This will run on all nodes before, and on all nodes + the new node after.
3668 "OP_TARGET": self.op.node_name,
3669 "NODE_NAME": self.op.node_name,
3670 "NODE_PIP": self.op.primary_ip,
3671 "NODE_SIP": self.op.secondary_ip,
3673 nodes_0 = self.cfg.GetNodeList()
3674 nodes_1 = nodes_0 + [self.op.node_name, ]
3675 return env, nodes_0, nodes_1
3677 def CheckPrereq(self):
3678 """Check prerequisites.
3681 - the new node is not already in the config
3683 - its parameters (single/dual homed) matches the cluster
3685 Any errors are signaled by raising errors.OpPrereqError.
3688 node_name = self.op.node_name
3691 dns_data = netutils.GetHostInfo(node_name)
3693 node = dns_data.name
3694 primary_ip = self.op.primary_ip = dns_data.ip
3695 if self.op.secondary_ip is None:
3696 self.op.secondary_ip = primary_ip
3697 if not netutils.IsValidIP4(self.op.secondary_ip):
3698 raise errors.OpPrereqError("Invalid secondary IP given",
3700 secondary_ip = self.op.secondary_ip
3702 node_list = cfg.GetNodeList()
3703 if not self.op.readd and node in node_list:
3704 raise errors.OpPrereqError("Node %s is already in the configuration" %
3705 node, errors.ECODE_EXISTS)
3706 elif self.op.readd and node not in node_list:
3707 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3710 self.changed_primary_ip = False
3712 for existing_node_name in node_list:
3713 existing_node = cfg.GetNodeInfo(existing_node_name)
3715 if self.op.readd and node == existing_node_name:
3716 if existing_node.secondary_ip != secondary_ip:
3717 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3718 " address configuration as before",
3720 if existing_node.primary_ip != primary_ip:
3721 self.changed_primary_ip = True
3725 if (existing_node.primary_ip == primary_ip or
3726 existing_node.secondary_ip == primary_ip or
3727 existing_node.primary_ip == secondary_ip or
3728 existing_node.secondary_ip == secondary_ip):
3729 raise errors.OpPrereqError("New node ip address(es) conflict with"
3730 " existing node %s" % existing_node.name,
3731 errors.ECODE_NOTUNIQUE)
3733 # check that the type of the node (single versus dual homed) is the
3734 # same as for the master
3735 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3736 master_singlehomed = myself.secondary_ip == myself.primary_ip
3737 newbie_singlehomed = secondary_ip == primary_ip
3738 if master_singlehomed != newbie_singlehomed:
3739 if master_singlehomed:
3740 raise errors.OpPrereqError("The master has no private ip but the"
3741 " new node has one",
3744 raise errors.OpPrereqError("The master has a private ip but the"
3745 " new node doesn't have one",
3748 # checks reachability
3749 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3750 raise errors.OpPrereqError("Node not reachable by ping",
3751 errors.ECODE_ENVIRON)
3753 if not newbie_singlehomed:
3754 # check reachability from my secondary ip to newbie's secondary ip
3755 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3756 source=myself.secondary_ip):
3757 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3758 " based ping to noded port",
3759 errors.ECODE_ENVIRON)
3766 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3769 self.new_node = self.cfg.GetNodeInfo(node)
3770 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3772 self.new_node = objects.Node(name=node,
3773 primary_ip=primary_ip,
3774 secondary_ip=secondary_ip,
3775 master_candidate=self.master_candidate,
3776 offline=False, drained=False)
3778 def Exec(self, feedback_fn):
3779 """Adds the new node to the cluster.
3782 new_node = self.new_node
3783 node = new_node.name
3785 # for re-adds, reset the offline/drained/master-candidate flags;
3786 # we need to reset here, otherwise offline would prevent RPC calls
3787 # later in the procedure; this also means that if the re-add
3788 # fails, we are left with a non-offlined, broken node
3790 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3791 self.LogInfo("Readding a node, the offline/drained flags were reset")
3792 # if we demote the node, we do cleanup later in the procedure
3793 new_node.master_candidate = self.master_candidate
3794 if self.changed_primary_ip:
3795 new_node.primary_ip = self.op.primary_ip
3797 # notify the user about any possible mc promotion
3798 if new_node.master_candidate:
3799 self.LogInfo("Node will be a master candidate")
3801 # check connectivity
3802 result = self.rpc.call_version([node])[node]
3803 result.Raise("Can't get version information from node %s" % node)
3804 if constants.PROTOCOL_VERSION == result.payload:
3805 logging.info("Communication to node %s fine, sw version %s match",
3806 node, result.payload)
3808 raise errors.OpExecError("Version mismatch master version %s,"
3809 " node version %s" %
3810 (constants.PROTOCOL_VERSION, result.payload))
3813 if self.cfg.GetClusterInfo().modify_ssh_setup:
3814 logging.info("Copy ssh key to node %s", node)
3815 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3817 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3818 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3822 keyarray.append(utils.ReadFile(i))
3824 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3825 keyarray[2], keyarray[3], keyarray[4],
3827 result.Raise("Cannot transfer ssh keys to the new node")
3829 # Add node to our /etc/hosts, and add key to known_hosts
3830 if self.cfg.GetClusterInfo().modify_etc_hosts:
3831 # FIXME: this should be done via an rpc call to node daemon
3832 utils.AddHostToEtcHosts(new_node.name)
3834 if new_node.secondary_ip != new_node.primary_ip:
3835 result = self.rpc.call_node_has_ip_address(new_node.name,
3836 new_node.secondary_ip)
3837 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3838 prereq=True, ecode=errors.ECODE_ENVIRON)
3839 if not result.payload:
3840 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3841 " you gave (%s). Please fix and re-run this"
3842 " command." % new_node.secondary_ip)
3844 node_verify_list = [self.cfg.GetMasterNode()]
3845 node_verify_param = {
3846 constants.NV_NODELIST: [node],
3847 # TODO: do a node-net-test as well?
3850 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3851 self.cfg.GetClusterName())
3852 for verifier in node_verify_list:
3853 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3854 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3856 for failed in nl_payload:
3857 feedback_fn("ssh/hostname verification failed"
3858 " (checking from %s): %s" %
3859 (verifier, nl_payload[failed]))
3860 raise errors.OpExecError("ssh/hostname verification failed.")
3863 _RedistributeAncillaryFiles(self)
3864 self.context.ReaddNode(new_node)
3865 # make sure we redistribute the config
3866 self.cfg.Update(new_node, feedback_fn)
3867 # and make sure the new node will not have old files around
3868 if not new_node.master_candidate:
3869 result = self.rpc.call_node_demote_from_mc(new_node.name)
3870 msg = result.fail_msg
3872 self.LogWarning("Node failed to demote itself from master"
3873 " candidate status: %s" % msg)
3875 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3876 self.context.AddNode(new_node, self.proc.GetECId())
3879 class LUSetNodeParams(LogicalUnit):
3880 """Modifies the parameters of a node.
3883 HPATH = "node-modify"
3884 HTYPE = constants.HTYPE_NODE
3887 ("master_candidate", None, _TMaybeBool),
3888 ("offline", None, _TMaybeBool),
3889 ("drained", None, _TMaybeBool),
3890 ("auto_promote", False, _TBool),
3895 def CheckArguments(self):
3896 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3897 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3898 if all_mods.count(None) == 3:
3899 raise errors.OpPrereqError("Please pass at least one modification",
3901 if all_mods.count(True) > 1:
3902 raise errors.OpPrereqError("Can't set the node into more than one"
3903 " state at the same time",
3906 # Boolean value that tells us whether we're offlining or draining the node
3907 self.offline_or_drain = (self.op.offline == True or
3908 self.op.drained == True)
3909 self.deoffline_or_drain = (self.op.offline == False or
3910 self.op.drained == False)
3911 self.might_demote = (self.op.master_candidate == False or
3912 self.offline_or_drain)
3914 self.lock_all = self.op.auto_promote and self.might_demote
3917 def ExpandNames(self):
3919 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3921 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3923 def BuildHooksEnv(self):
3926 This runs on the master node.
3930 "OP_TARGET": self.op.node_name,
3931 "MASTER_CANDIDATE": str(self.op.master_candidate),
3932 "OFFLINE": str(self.op.offline),
3933 "DRAINED": str(self.op.drained),
3935 nl = [self.cfg.GetMasterNode(),
3939 def CheckPrereq(self):
3940 """Check prerequisites.
3942 This only checks the instance list against the existing names.
3945 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3947 if (self.op.master_candidate is not None or
3948 self.op.drained is not None or
3949 self.op.offline is not None):
3950 # we can't change the master's node flags
3951 if self.op.node_name == self.cfg.GetMasterNode():
3952 raise errors.OpPrereqError("The master role can be changed"
3953 " only via masterfailover",
3957 if node.master_candidate and self.might_demote and not self.lock_all:
3958 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3959 # check if after removing the current node, we're missing master
3961 (mc_remaining, mc_should, _) = \
3962 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3963 if mc_remaining < mc_should:
3964 raise errors.OpPrereqError("Not enough master candidates, please"
3965 " pass auto_promote to allow promotion",
3968 if (self.op.master_candidate == True and
3969 ((node.offline and not self.op.offline == False) or
3970 (node.drained and not self.op.drained == False))):
3971 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3972 " to master_candidate" % node.name,
3975 # If we're being deofflined/drained, we'll MC ourself if needed
3976 if (self.deoffline_or_drain and not self.offline_or_drain and not
3977 self.op.master_candidate == True and not node.master_candidate):
3978 self.op.master_candidate = _DecideSelfPromotion(self)
3979 if self.op.master_candidate:
3980 self.LogInfo("Autopromoting node to master candidate")
3984 def Exec(self, feedback_fn):
3993 if self.op.offline is not None:
3994 node.offline = self.op.offline
3995 result.append(("offline", str(self.op.offline)))
3996 if self.op.offline == True:
3997 if node.master_candidate:
3998 node.master_candidate = False
4000 result.append(("master_candidate", "auto-demotion due to offline"))
4002 node.drained = False
4003 result.append(("drained", "clear drained status due to offline"))
4005 if self.op.master_candidate is not None:
4006 node.master_candidate = self.op.master_candidate
4008 result.append(("master_candidate", str(self.op.master_candidate)))
4009 if self.op.master_candidate == False:
4010 rrc = self.rpc.call_node_demote_from_mc(node.name)
4013 self.LogWarning("Node failed to demote itself: %s" % msg)
4015 if self.op.drained is not None:
4016 node.drained = self.op.drained
4017 result.append(("drained", str(self.op.drained)))
4018 if self.op.drained == True:
4019 if node.master_candidate:
4020 node.master_candidate = False
4022 result.append(("master_candidate", "auto-demotion due to drain"))
4023 rrc = self.rpc.call_node_demote_from_mc(node.name)
4026 self.LogWarning("Node failed to demote itself: %s" % msg)
4028 node.offline = False
4029 result.append(("offline", "clear offline status due to drain"))
4031 # we locked all nodes, we adjust the CP before updating this node
4033 _AdjustCandidatePool(self, [node.name])
4035 # this will trigger configuration file update, if needed
4036 self.cfg.Update(node, feedback_fn)
4038 # this will trigger job queue propagation or cleanup
4040 self.context.ReaddNode(node)
4045 class LUPowercycleNode(NoHooksLU):
4046 """Powercycles a node.
4055 def CheckArguments(self):
4056 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4057 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4058 raise errors.OpPrereqError("The node is the master and the force"
4059 " parameter was not set",
4062 def ExpandNames(self):
4063 """Locking for PowercycleNode.
4065 This is a last-resort option and shouldn't block on other
4066 jobs. Therefore, we grab no locks.
4069 self.needed_locks = {}
4071 def Exec(self, feedback_fn):
4075 result = self.rpc.call_node_powercycle(self.op.node_name,
4076 self.cfg.GetHypervisorType())
4077 result.Raise("Failed to schedule the reboot")
4078 return result.payload
4081 class LUQueryClusterInfo(NoHooksLU):
4082 """Query cluster configuration.
4087 def ExpandNames(self):
4088 self.needed_locks = {}
4090 def Exec(self, feedback_fn):
4091 """Return cluster config.
4094 cluster = self.cfg.GetClusterInfo()
4097 # Filter just for enabled hypervisors
4098 for os_name, hv_dict in cluster.os_hvp.items():
4099 os_hvp[os_name] = {}
4100 for hv_name, hv_params in hv_dict.items():
4101 if hv_name in cluster.enabled_hypervisors:
4102 os_hvp[os_name][hv_name] = hv_params
4105 "software_version": constants.RELEASE_VERSION,
4106 "protocol_version": constants.PROTOCOL_VERSION,
4107 "config_version": constants.CONFIG_VERSION,
4108 "os_api_version": max(constants.OS_API_VERSIONS),
4109 "export_version": constants.EXPORT_VERSION,
4110 "architecture": (platform.architecture()[0], platform.machine()),
4111 "name": cluster.cluster_name,
4112 "master": cluster.master_node,
4113 "default_hypervisor": cluster.enabled_hypervisors[0],
4114 "enabled_hypervisors": cluster.enabled_hypervisors,
4115 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4116 for hypervisor_name in cluster.enabled_hypervisors]),
4118 "beparams": cluster.beparams,
4119 "osparams": cluster.osparams,
4120 "nicparams": cluster.nicparams,
4121 "candidate_pool_size": cluster.candidate_pool_size,
4122 "master_netdev": cluster.master_netdev,
4123 "volume_group_name": cluster.volume_group_name,
4124 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4125 "file_storage_dir": cluster.file_storage_dir,
4126 "maintain_node_health": cluster.maintain_node_health,
4127 "ctime": cluster.ctime,
4128 "mtime": cluster.mtime,
4129 "uuid": cluster.uuid,
4130 "tags": list(cluster.GetTags()),
4131 "uid_pool": cluster.uid_pool,
4132 "default_iallocator": cluster.default_iallocator,
4138 class LUQueryConfigValues(NoHooksLU):
4139 """Return configuration values.
4142 _OP_PARAMS = [_POutputFields]
4144 _FIELDS_DYNAMIC = utils.FieldSet()
4145 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4148 def CheckArguments(self):
4149 _CheckOutputFields(static=self._FIELDS_STATIC,
4150 dynamic=self._FIELDS_DYNAMIC,
4151 selected=self.op.output_fields)
4153 def ExpandNames(self):
4154 self.needed_locks = {}
4156 def Exec(self, feedback_fn):
4157 """Dump a representation of the cluster config to the standard output.
4161 for field in self.op.output_fields:
4162 if field == "cluster_name":
4163 entry = self.cfg.GetClusterName()
4164 elif field == "master_node":
4165 entry = self.cfg.GetMasterNode()
4166 elif field == "drain_flag":
4167 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4168 elif field == "watcher_pause":
4169 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4171 raise errors.ParameterError(field)
4172 values.append(entry)
4176 class LUActivateInstanceDisks(NoHooksLU):
4177 """Bring up an instance's disks.
4182 ("ignore_size", False, _TBool),
4186 def ExpandNames(self):
4187 self._ExpandAndLockInstance()
4188 self.needed_locks[locking.LEVEL_NODE] = []
4189 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4191 def DeclareLocks(self, level):
4192 if level == locking.LEVEL_NODE:
4193 self._LockInstancesNodes()
4195 def CheckPrereq(self):
4196 """Check prerequisites.
4198 This checks that the instance is in the cluster.
4201 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4202 assert self.instance is not None, \
4203 "Cannot retrieve locked instance %s" % self.op.instance_name
4204 _CheckNodeOnline(self, self.instance.primary_node)
4206 def Exec(self, feedback_fn):
4207 """Activate the disks.
4210 disks_ok, disks_info = \
4211 _AssembleInstanceDisks(self, self.instance,
4212 ignore_size=self.op.ignore_size)
4214 raise errors.OpExecError("Cannot activate block devices")
4219 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4221 """Prepare the block devices for an instance.
4223 This sets up the block devices on all nodes.
4225 @type lu: L{LogicalUnit}
4226 @param lu: the logical unit on whose behalf we execute
4227 @type instance: L{objects.Instance}
4228 @param instance: the instance for whose disks we assemble
4229 @type disks: list of L{objects.Disk} or None
4230 @param disks: which disks to assemble (or all, if None)
4231 @type ignore_secondaries: boolean
4232 @param ignore_secondaries: if true, errors on secondary nodes
4233 won't result in an error return from the function
4234 @type ignore_size: boolean
4235 @param ignore_size: if true, the current known size of the disk
4236 will not be used during the disk activation, useful for cases
4237 when the size is wrong
4238 @return: False if the operation failed, otherwise a list of
4239 (host, instance_visible_name, node_visible_name)
4240 with the mapping from node devices to instance devices
4245 iname = instance.name
4246 disks = _ExpandCheckDisks(instance, disks)
4248 # With the two passes mechanism we try to reduce the window of
4249 # opportunity for the race condition of switching DRBD to primary
4250 # before handshaking occured, but we do not eliminate it
4252 # The proper fix would be to wait (with some limits) until the
4253 # connection has been made and drbd transitions from WFConnection
4254 # into any other network-connected state (Connected, SyncTarget,
4257 # 1st pass, assemble on all nodes in secondary mode
4258 for inst_disk in disks:
4259 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4261 node_disk = node_disk.Copy()
4262 node_disk.UnsetSize()
4263 lu.cfg.SetDiskID(node_disk, node)
4264 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4265 msg = result.fail_msg
4267 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4268 " (is_primary=False, pass=1): %s",
4269 inst_disk.iv_name, node, msg)
4270 if not ignore_secondaries:
4273 # FIXME: race condition on drbd migration to primary
4275 # 2nd pass, do only the primary node
4276 for inst_disk in disks:
4279 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4280 if node != instance.primary_node:
4283 node_disk = node_disk.Copy()
4284 node_disk.UnsetSize()
4285 lu.cfg.SetDiskID(node_disk, node)
4286 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4287 msg = result.fail_msg
4289 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4290 " (is_primary=True, pass=2): %s",
4291 inst_disk.iv_name, node, msg)
4294 dev_path = result.payload
4296 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4298 # leave the disks configured for the primary node
4299 # this is a workaround that would be fixed better by
4300 # improving the logical/physical id handling
4302 lu.cfg.SetDiskID(disk, instance.primary_node)
4304 return disks_ok, device_info
4307 def _StartInstanceDisks(lu, instance, force):
4308 """Start the disks of an instance.
4311 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4312 ignore_secondaries=force)
4314 _ShutdownInstanceDisks(lu, instance)
4315 if force is not None and not force:
4316 lu.proc.LogWarning("", hint="If the message above refers to a"
4318 " you can retry the operation using '--force'.")
4319 raise errors.OpExecError("Disk consistency error")
4322 class LUDeactivateInstanceDisks(NoHooksLU):
4323 """Shutdown an instance's disks.
4331 def ExpandNames(self):
4332 self._ExpandAndLockInstance()
4333 self.needed_locks[locking.LEVEL_NODE] = []
4334 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4336 def DeclareLocks(self, level):
4337 if level == locking.LEVEL_NODE:
4338 self._LockInstancesNodes()
4340 def CheckPrereq(self):
4341 """Check prerequisites.
4343 This checks that the instance is in the cluster.
4346 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4347 assert self.instance is not None, \
4348 "Cannot retrieve locked instance %s" % self.op.instance_name
4350 def Exec(self, feedback_fn):
4351 """Deactivate the disks
4354 instance = self.instance
4355 _SafeShutdownInstanceDisks(self, instance)
4358 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4359 """Shutdown block devices of an instance.
4361 This function checks if an instance is running, before calling
4362 _ShutdownInstanceDisks.
4365 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4366 _ShutdownInstanceDisks(lu, instance, disks=disks)
4369 def _ExpandCheckDisks(instance, disks):
4370 """Return the instance disks selected by the disks list
4372 @type disks: list of L{objects.Disk} or None
4373 @param disks: selected disks
4374 @rtype: list of L{objects.Disk}
4375 @return: selected instance disks to act on
4379 return instance.disks
4381 if not set(disks).issubset(instance.disks):
4382 raise errors.ProgrammerError("Can only act on disks belonging to the"
4387 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4388 """Shutdown block devices of an instance.
4390 This does the shutdown on all nodes of the instance.
4392 If the ignore_primary is false, errors on the primary node are
4397 disks = _ExpandCheckDisks(instance, disks)
4400 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4401 lu.cfg.SetDiskID(top_disk, node)
4402 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4403 msg = result.fail_msg
4405 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4406 disk.iv_name, node, msg)
4407 if not ignore_primary or node != instance.primary_node:
4412 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4413 """Checks if a node has enough free memory.
4415 This function check if a given node has the needed amount of free
4416 memory. In case the node has less memory or we cannot get the
4417 information from the node, this function raise an OpPrereqError
4420 @type lu: C{LogicalUnit}
4421 @param lu: a logical unit from which we get configuration data
4423 @param node: the node to check
4424 @type reason: C{str}
4425 @param reason: string to use in the error message
4426 @type requested: C{int}
4427 @param requested: the amount of memory in MiB to check for
4428 @type hypervisor_name: C{str}
4429 @param hypervisor_name: the hypervisor to ask for memory stats
4430 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4431 we cannot check the node
4434 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4435 nodeinfo[node].Raise("Can't get data from node %s" % node,
4436 prereq=True, ecode=errors.ECODE_ENVIRON)
4437 free_mem = nodeinfo[node].payload.get('memory_free', None)
4438 if not isinstance(free_mem, int):
4439 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4440 " was '%s'" % (node, free_mem),
4441 errors.ECODE_ENVIRON)
4442 if requested > free_mem:
4443 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4444 " needed %s MiB, available %s MiB" %
4445 (node, reason, requested, free_mem),
4449 def _CheckNodesFreeDisk(lu, nodenames, requested):
4450 """Checks if nodes have enough free disk space in the default VG.
4452 This function check if all given nodes have the needed amount of
4453 free disk. In case any node has less disk or we cannot get the
4454 information from the node, this function raise an OpPrereqError
4457 @type lu: C{LogicalUnit}
4458 @param lu: a logical unit from which we get configuration data
4459 @type nodenames: C{list}
4460 @param nodenames: the list of node names to check
4461 @type requested: C{int}
4462 @param requested: the amount of disk in MiB to check for
4463 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4464 we cannot check the node
4467 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4468 lu.cfg.GetHypervisorType())
4469 for node in nodenames:
4470 info = nodeinfo[node]
4471 info.Raise("Cannot get current information from node %s" % node,
4472 prereq=True, ecode=errors.ECODE_ENVIRON)
4473 vg_free = info.payload.get("vg_free", None)
4474 if not isinstance(vg_free, int):
4475 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4476 " result was '%s'" % (node, vg_free),
4477 errors.ECODE_ENVIRON)
4478 if requested > vg_free:
4479 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4480 " required %d MiB, available %d MiB" %
4481 (node, requested, vg_free),
4485 class LUStartupInstance(LogicalUnit):
4486 """Starts an instance.
4489 HPATH = "instance-start"
4490 HTYPE = constants.HTYPE_INSTANCE
4494 ("hvparams", _EmptyDict, _TDict),
4495 ("beparams", _EmptyDict, _TDict),
4499 def CheckArguments(self):
4501 if self.op.beparams:
4502 # fill the beparams dict
4503 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4505 def ExpandNames(self):
4506 self._ExpandAndLockInstance()
4508 def BuildHooksEnv(self):
4511 This runs on master, primary and secondary nodes of the instance.
4515 "FORCE": self.op.force,
4517 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4518 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4521 def CheckPrereq(self):
4522 """Check prerequisites.
4524 This checks that the instance is in the cluster.
4527 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4528 assert self.instance is not None, \
4529 "Cannot retrieve locked instance %s" % self.op.instance_name
4532 if self.op.hvparams:
4533 # check hypervisor parameter syntax (locally)
4534 cluster = self.cfg.GetClusterInfo()
4535 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4536 filled_hvp = cluster.FillHV(instance)
4537 filled_hvp.update(self.op.hvparams)
4538 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4539 hv_type.CheckParameterSyntax(filled_hvp)
4540 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4542 _CheckNodeOnline(self, instance.primary_node)
4544 bep = self.cfg.GetClusterInfo().FillBE(instance)
4545 # check bridges existence
4546 _CheckInstanceBridgesExist(self, instance)
4548 remote_info = self.rpc.call_instance_info(instance.primary_node,
4550 instance.hypervisor)
4551 remote_info.Raise("Error checking node %s" % instance.primary_node,
4552 prereq=True, ecode=errors.ECODE_ENVIRON)
4553 if not remote_info.payload: # not running already
4554 _CheckNodeFreeMemory(self, instance.primary_node,
4555 "starting instance %s" % instance.name,
4556 bep[constants.BE_MEMORY], instance.hypervisor)
4558 def Exec(self, feedback_fn):
4559 """Start the instance.
4562 instance = self.instance
4563 force = self.op.force
4565 self.cfg.MarkInstanceUp(instance.name)
4567 node_current = instance.primary_node
4569 _StartInstanceDisks(self, instance, force)
4571 result = self.rpc.call_instance_start(node_current, instance,
4572 self.op.hvparams, self.op.beparams)
4573 msg = result.fail_msg
4575 _ShutdownInstanceDisks(self, instance)
4576 raise errors.OpExecError("Could not start instance: %s" % msg)
4579 class LURebootInstance(LogicalUnit):
4580 """Reboot an instance.
4583 HPATH = "instance-reboot"
4584 HTYPE = constants.HTYPE_INSTANCE
4587 ("ignore_secondaries", False, _TBool),
4588 ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4593 def ExpandNames(self):
4594 self._ExpandAndLockInstance()
4596 def BuildHooksEnv(self):
4599 This runs on master, primary and secondary nodes of the instance.
4603 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4604 "REBOOT_TYPE": self.op.reboot_type,
4605 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4607 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4608 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4611 def CheckPrereq(self):
4612 """Check prerequisites.
4614 This checks that the instance is in the cluster.
4617 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4618 assert self.instance is not None, \
4619 "Cannot retrieve locked instance %s" % self.op.instance_name
4621 _CheckNodeOnline(self, instance.primary_node)
4623 # check bridges existence
4624 _CheckInstanceBridgesExist(self, instance)
4626 def Exec(self, feedback_fn):
4627 """Reboot the instance.
4630 instance = self.instance
4631 ignore_secondaries = self.op.ignore_secondaries
4632 reboot_type = self.op.reboot_type
4634 node_current = instance.primary_node
4636 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4637 constants.INSTANCE_REBOOT_HARD]:
4638 for disk in instance.disks:
4639 self.cfg.SetDiskID(disk, node_current)
4640 result = self.rpc.call_instance_reboot(node_current, instance,
4642 self.op.shutdown_timeout)
4643 result.Raise("Could not reboot instance")
4645 result = self.rpc.call_instance_shutdown(node_current, instance,
4646 self.op.shutdown_timeout)
4647 result.Raise("Could not shutdown instance for full reboot")
4648 _ShutdownInstanceDisks(self, instance)
4649 _StartInstanceDisks(self, instance, ignore_secondaries)
4650 result = self.rpc.call_instance_start(node_current, instance, None, None)
4651 msg = result.fail_msg
4653 _ShutdownInstanceDisks(self, instance)
4654 raise errors.OpExecError("Could not start instance for"
4655 " full reboot: %s" % msg)
4657 self.cfg.MarkInstanceUp(instance.name)
4660 class LUShutdownInstance(LogicalUnit):
4661 """Shutdown an instance.
4664 HPATH = "instance-stop"
4665 HTYPE = constants.HTYPE_INSTANCE
4668 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4672 def ExpandNames(self):
4673 self._ExpandAndLockInstance()
4675 def BuildHooksEnv(self):
4678 This runs on master, primary and secondary nodes of the instance.
4681 env = _BuildInstanceHookEnvByObject(self, self.instance)
4682 env["TIMEOUT"] = self.op.timeout
4683 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4686 def CheckPrereq(self):
4687 """Check prerequisites.
4689 This checks that the instance is in the cluster.
4692 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4693 assert self.instance is not None, \
4694 "Cannot retrieve locked instance %s" % self.op.instance_name
4695 _CheckNodeOnline(self, self.instance.primary_node)
4697 def Exec(self, feedback_fn):
4698 """Shutdown the instance.
4701 instance = self.instance
4702 node_current = instance.primary_node
4703 timeout = self.op.timeout
4704 self.cfg.MarkInstanceDown(instance.name)
4705 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4706 msg = result.fail_msg
4708 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4710 _ShutdownInstanceDisks(self, instance)
4713 class LUReinstallInstance(LogicalUnit):
4714 """Reinstall an instance.
4717 HPATH = "instance-reinstall"
4718 HTYPE = constants.HTYPE_INSTANCE
4721 ("os_type", None, _TMaybeString),
4722 ("force_variant", False, _TBool),
4726 def ExpandNames(self):
4727 self._ExpandAndLockInstance()
4729 def BuildHooksEnv(self):
4732 This runs on master, primary and secondary nodes of the instance.
4735 env = _BuildInstanceHookEnvByObject(self, self.instance)
4736 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4739 def CheckPrereq(self):
4740 """Check prerequisites.
4742 This checks that the instance is in the cluster and is not running.
4745 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4746 assert instance is not None, \
4747 "Cannot retrieve locked instance %s" % self.op.instance_name
4748 _CheckNodeOnline(self, instance.primary_node)
4750 if instance.disk_template == constants.DT_DISKLESS:
4751 raise errors.OpPrereqError("Instance '%s' has no disks" %
4752 self.op.instance_name,
4754 _CheckInstanceDown(self, instance, "cannot reinstall")
4756 if self.op.os_type is not None:
4758 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4759 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4761 self.instance = instance
4763 def Exec(self, feedback_fn):
4764 """Reinstall the instance.
4767 inst = self.instance
4769 if self.op.os_type is not None:
4770 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4771 inst.os = self.op.os_type
4772 self.cfg.Update(inst, feedback_fn)
4774 _StartInstanceDisks(self, inst, None)
4776 feedback_fn("Running the instance OS create scripts...")
4777 # FIXME: pass debug option from opcode to backend
4778 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4779 self.op.debug_level)
4780 result.Raise("Could not install OS for instance %s on node %s" %
4781 (inst.name, inst.primary_node))
4783 _ShutdownInstanceDisks(self, inst)
4786 class LURecreateInstanceDisks(LogicalUnit):
4787 """Recreate an instance's missing disks.
4790 HPATH = "instance-recreate-disks"
4791 HTYPE = constants.HTYPE_INSTANCE
4794 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4798 def ExpandNames(self):
4799 self._ExpandAndLockInstance()
4801 def BuildHooksEnv(self):
4804 This runs on master, primary and secondary nodes of the instance.
4807 env = _BuildInstanceHookEnvByObject(self, self.instance)
4808 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4811 def CheckPrereq(self):
4812 """Check prerequisites.
4814 This checks that the instance is in the cluster and is not running.
4817 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4818 assert instance is not None, \
4819 "Cannot retrieve locked instance %s" % self.op.instance_name
4820 _CheckNodeOnline(self, instance.primary_node)
4822 if instance.disk_template == constants.DT_DISKLESS:
4823 raise errors.OpPrereqError("Instance '%s' has no disks" %
4824 self.op.instance_name, errors.ECODE_INVAL)
4825 _CheckInstanceDown(self, instance, "cannot recreate disks")
4827 if not self.op.disks:
4828 self.op.disks = range(len(instance.disks))
4830 for idx in self.op.disks:
4831 if idx >= len(instance.disks):
4832 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4835 self.instance = instance
4837 def Exec(self, feedback_fn):
4838 """Recreate the disks.
4842 for idx, _ in enumerate(self.instance.disks):
4843 if idx not in self.op.disks: # disk idx has not been passed in
4847 _CreateDisks(self, self.instance, to_skip=to_skip)
4850 class LURenameInstance(LogicalUnit):
4851 """Rename an instance.
4854 HPATH = "instance-rename"
4855 HTYPE = constants.HTYPE_INSTANCE
4858 ("new_name", _NoDefault, _TNonEmptyString),
4859 ("ignore_ip", False, _TBool),
4860 ("check_name", True, _TBool),
4863 def BuildHooksEnv(self):
4866 This runs on master, primary and secondary nodes of the instance.
4869 env = _BuildInstanceHookEnvByObject(self, self.instance)
4870 env["INSTANCE_NEW_NAME"] = self.op.new_name
4871 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4874 def CheckPrereq(self):
4875 """Check prerequisites.
4877 This checks that the instance is in the cluster and is not running.
4880 self.op.instance_name = _ExpandInstanceName(self.cfg,
4881 self.op.instance_name)
4882 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4883 assert instance is not None
4884 _CheckNodeOnline(self, instance.primary_node)
4885 _CheckInstanceDown(self, instance, "cannot rename")
4886 self.instance = instance
4888 # new name verification
4889 if self.op.check_name:
4890 name_info = netutils.GetHostInfo(self.op.new_name)
4891 self.op.new_name = name_info.name
4893 new_name = self.op.new_name
4895 instance_list = self.cfg.GetInstanceList()
4896 if new_name in instance_list:
4897 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4898 new_name, errors.ECODE_EXISTS)
4900 if not self.op.ignore_ip:
4901 if netutils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4902 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4903 (name_info.ip, new_name),
4904 errors.ECODE_NOTUNIQUE)
4906 def Exec(self, feedback_fn):
4907 """Reinstall the instance.
4910 inst = self.instance
4911 old_name = inst.name
4913 if inst.disk_template == constants.DT_FILE:
4914 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4916 self.cfg.RenameInstance(inst.name, self.op.new_name)
4917 # Change the instance lock. This is definitely safe while we hold the BGL
4918 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4919 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4921 # re-read the instance from the configuration after rename
4922 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4924 if inst.disk_template == constants.DT_FILE:
4925 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4926 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4927 old_file_storage_dir,
4928 new_file_storage_dir)
4929 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4930 " (but the instance has been renamed in Ganeti)" %
4931 (inst.primary_node, old_file_storage_dir,
4932 new_file_storage_dir))
4934 _StartInstanceDisks(self, inst, None)
4936 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4937 old_name, self.op.debug_level)
4938 msg = result.fail_msg
4940 msg = ("Could not run OS rename script for instance %s on node %s"
4941 " (but the instance has been renamed in Ganeti): %s" %
4942 (inst.name, inst.primary_node, msg))
4943 self.proc.LogWarning(msg)
4945 _ShutdownInstanceDisks(self, inst)
4948 class LURemoveInstance(LogicalUnit):
4949 """Remove an instance.
4952 HPATH = "instance-remove"
4953 HTYPE = constants.HTYPE_INSTANCE
4956 ("ignore_failures", False, _TBool),
4961 def ExpandNames(self):
4962 self._ExpandAndLockInstance()
4963 self.needed_locks[locking.LEVEL_NODE] = []
4964 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4966 def DeclareLocks(self, level):
4967 if level == locking.LEVEL_NODE:
4968 self._LockInstancesNodes()
4970 def BuildHooksEnv(self):
4973 This runs on master, primary and secondary nodes of the instance.
4976 env = _BuildInstanceHookEnvByObject(self, self.instance)
4977 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4978 nl = [self.cfg.GetMasterNode()]
4979 nl_post = list(self.instance.all_nodes) + nl
4980 return env, nl, nl_post
4982 def CheckPrereq(self):
4983 """Check prerequisites.
4985 This checks that the instance is in the cluster.
4988 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4989 assert self.instance is not None, \
4990 "Cannot retrieve locked instance %s" % self.op.instance_name
4992 def Exec(self, feedback_fn):
4993 """Remove the instance.
4996 instance = self.instance
4997 logging.info("Shutting down instance %s on node %s",
4998 instance.name, instance.primary_node)
5000 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5001 self.op.shutdown_timeout)
5002 msg = result.fail_msg
5004 if self.op.ignore_failures:
5005 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5007 raise errors.OpExecError("Could not shutdown instance %s on"
5009 (instance.name, instance.primary_node, msg))
5011 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5014 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5015 """Utility function to remove an instance.
5018 logging.info("Removing block devices for instance %s", instance.name)
5020 if not _RemoveDisks(lu, instance):
5021 if not ignore_failures:
5022 raise errors.OpExecError("Can't remove instance's disks")
5023 feedback_fn("Warning: can't remove instance's disks")
5025 logging.info("Removing instance %s out of cluster config", instance.name)
5027 lu.cfg.RemoveInstance(instance.name)
5029 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5030 "Instance lock removal conflict"
5032 # Remove lock for the instance
5033 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5036 class LUQueryInstances(NoHooksLU):
5037 """Logical unit for querying instances.
5040 # pylint: disable-msg=W0142
5042 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5043 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5044 ("use_locking", False, _TBool),
5047 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5048 "serial_no", "ctime", "mtime", "uuid"]
5049 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5051 "disk_template", "ip", "mac", "bridge",
5052 "nic_mode", "nic_link",
5053 "sda_size", "sdb_size", "vcpus", "tags",
5054 "network_port", "beparams",
5055 r"(disk)\.(size)/([0-9]+)",
5056 r"(disk)\.(sizes)", "disk_usage",
5057 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5058 r"(nic)\.(bridge)/([0-9]+)",
5059 r"(nic)\.(macs|ips|modes|links|bridges)",
5060 r"(disk|nic)\.(count)",
5062 ] + _SIMPLE_FIELDS +
5064 for name in constants.HVS_PARAMETERS
5065 if name not in constants.HVC_GLOBALS] +
5067 for name in constants.BES_PARAMETERS])
5068 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5074 def CheckArguments(self):
5075 _CheckOutputFields(static=self._FIELDS_STATIC,
5076 dynamic=self._FIELDS_DYNAMIC,
5077 selected=self.op.output_fields)
5079 def ExpandNames(self):
5080 self.needed_locks = {}
5081 self.share_locks[locking.LEVEL_INSTANCE] = 1
5082 self.share_locks[locking.LEVEL_NODE] = 1
5085 self.wanted = _GetWantedInstances(self, self.op.names)
5087 self.wanted = locking.ALL_SET
5089 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5090 self.do_locking = self.do_node_query and self.op.use_locking
5092 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5093 self.needed_locks[locking.LEVEL_NODE] = []
5094 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5096 def DeclareLocks(self, level):
5097 if level == locking.LEVEL_NODE and self.do_locking:
5098 self._LockInstancesNodes()
5100 def Exec(self, feedback_fn):
5101 """Computes the list of nodes and their attributes.
5104 # pylint: disable-msg=R0912
5105 # way too many branches here
5106 all_info = self.cfg.GetAllInstancesInfo()
5107 if self.wanted == locking.ALL_SET:
5108 # caller didn't specify instance names, so ordering is not important
5110 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5112 instance_names = all_info.keys()
5113 instance_names = utils.NiceSort(instance_names)
5115 # caller did specify names, so we must keep the ordering
5117 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5119 tgt_set = all_info.keys()
5120 missing = set(self.wanted).difference(tgt_set)
5122 raise errors.OpExecError("Some instances were removed before"
5123 " retrieving their data: %s" % missing)
5124 instance_names = self.wanted
5126 instance_list = [all_info[iname] for iname in instance_names]
5128 # begin data gathering
5130 nodes = frozenset([inst.primary_node for inst in instance_list])
5131 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5135 if self.do_node_query:
5137 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5139 result = node_data[name]
5141 # offline nodes will be in both lists
5142 off_nodes.append(name)
5144 bad_nodes.append(name)
5147 live_data.update(result.payload)
5148 # else no instance is alive
5150 live_data = dict([(name, {}) for name in instance_names])
5152 # end data gathering
5157 cluster = self.cfg.GetClusterInfo()
5158 for instance in instance_list:
5160 i_hv = cluster.FillHV(instance, skip_globals=True)
5161 i_be = cluster.FillBE(instance)
5162 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5163 for field in self.op.output_fields:
5164 st_match = self._FIELDS_STATIC.Matches(field)
5165 if field in self._SIMPLE_FIELDS:
5166 val = getattr(instance, field)
5167 elif field == "pnode":
5168 val = instance.primary_node
5169 elif field == "snodes":
5170 val = list(instance.secondary_nodes)
5171 elif field == "admin_state":
5172 val = instance.admin_up
5173 elif field == "oper_state":
5174 if instance.primary_node in bad_nodes:
5177 val = bool(live_data.get(instance.name))
5178 elif field == "status":
5179 if instance.primary_node in off_nodes:
5180 val = "ERROR_nodeoffline"
5181 elif instance.primary_node in bad_nodes:
5182 val = "ERROR_nodedown"
5184 running = bool(live_data.get(instance.name))
5186 if instance.admin_up:
5191 if instance.admin_up:
5195 elif field == "oper_ram":
5196 if instance.primary_node in bad_nodes:
5198 elif instance.name in live_data:
5199 val = live_data[instance.name].get("memory", "?")
5202 elif field == "oper_vcpus":
5203 if instance.primary_node in bad_nodes:
5205 elif instance.name in live_data:
5206 val = live_data[instance.name].get("vcpus", "?")
5209 elif field == "vcpus":
5210 val = i_be[constants.BE_VCPUS]
5211 elif field == "disk_template":
5212 val = instance.disk_template
5215 val = instance.nics[0].ip
5218 elif field == "nic_mode":
5220 val = i_nicp[0][constants.NIC_MODE]
5223 elif field == "nic_link":
5225 val = i_nicp[0][constants.NIC_LINK]
5228 elif field == "bridge":
5229 if (instance.nics and
5230 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5231 val = i_nicp[0][constants.NIC_LINK]
5234 elif field == "mac":
5236 val = instance.nics[0].mac
5239 elif field == "sda_size" or field == "sdb_size":
5240 idx = ord(field[2]) - ord('a')
5242 val = instance.FindDisk(idx).size
5243 except errors.OpPrereqError:
5245 elif field == "disk_usage": # total disk usage per node
5246 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5247 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5248 elif field == "tags":
5249 val = list(instance.GetTags())
5250 elif field == "hvparams":
5252 elif (field.startswith(HVPREFIX) and
5253 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5254 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5255 val = i_hv.get(field[len(HVPREFIX):], None)
5256 elif field == "beparams":
5258 elif (field.startswith(BEPREFIX) and
5259 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5260 val = i_be.get(field[len(BEPREFIX):], None)
5261 elif st_match and st_match.groups():
5262 # matches a variable list
5263 st_groups = st_match.groups()
5264 if st_groups and st_groups[0] == "disk":
5265 if st_groups[1] == "count":
5266 val = len(instance.disks)
5267 elif st_groups[1] == "sizes":
5268 val = [disk.size for disk in instance.disks]
5269 elif st_groups[1] == "size":
5271 val = instance.FindDisk(st_groups[2]).size
5272 except errors.OpPrereqError:
5275 assert False, "Unhandled disk parameter"
5276 elif st_groups[0] == "nic":
5277 if st_groups[1] == "count":
5278 val = len(instance.nics)
5279 elif st_groups[1] == "macs":
5280 val = [nic.mac for nic in instance.nics]
5281 elif st_groups[1] == "ips":
5282 val = [nic.ip for nic in instance.nics]
5283 elif st_groups[1] == "modes":
5284 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5285 elif st_groups[1] == "links":
5286 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5287 elif st_groups[1] == "bridges":
5290 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5291 val.append(nicp[constants.NIC_LINK])
5296 nic_idx = int(st_groups[2])
5297 if nic_idx >= len(instance.nics):
5300 if st_groups[1] == "mac":
5301 val = instance.nics[nic_idx].mac
5302 elif st_groups[1] == "ip":
5303 val = instance.nics[nic_idx].ip
5304 elif st_groups[1] == "mode":
5305 val = i_nicp[nic_idx][constants.NIC_MODE]
5306 elif st_groups[1] == "link":
5307 val = i_nicp[nic_idx][constants.NIC_LINK]
5308 elif st_groups[1] == "bridge":
5309 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5310 if nic_mode == constants.NIC_MODE_BRIDGED:
5311 val = i_nicp[nic_idx][constants.NIC_LINK]
5315 assert False, "Unhandled NIC parameter"
5317 assert False, ("Declared but unhandled variable parameter '%s'" %
5320 assert False, "Declared but unhandled parameter '%s'" % field
5327 class LUFailoverInstance(LogicalUnit):
5328 """Failover an instance.
5331 HPATH = "instance-failover"
5332 HTYPE = constants.HTYPE_INSTANCE
5335 ("ignore_consistency", False, _TBool),
5340 def ExpandNames(self):
5341 self._ExpandAndLockInstance()
5342 self.needed_locks[locking.LEVEL_NODE] = []
5343 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5345 def DeclareLocks(self, level):
5346 if level == locking.LEVEL_NODE:
5347 self._LockInstancesNodes()
5349 def BuildHooksEnv(self):
5352 This runs on master, primary and secondary nodes of the instance.
5355 instance = self.instance
5356 source_node = instance.primary_node
5357 target_node = instance.secondary_nodes[0]
5359 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5360 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5361 "OLD_PRIMARY": source_node,
5362 "OLD_SECONDARY": target_node,
5363 "NEW_PRIMARY": target_node,
5364 "NEW_SECONDARY": source_node,
5366 env.update(_BuildInstanceHookEnvByObject(self, instance))
5367 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5369 nl_post.append(source_node)
5370 return env, nl, nl_post
5372 def CheckPrereq(self):
5373 """Check prerequisites.
5375 This checks that the instance is in the cluster.
5378 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5379 assert self.instance is not None, \
5380 "Cannot retrieve locked instance %s" % self.op.instance_name
5382 bep = self.cfg.GetClusterInfo().FillBE(instance)
5383 if instance.disk_template not in constants.DTS_NET_MIRROR:
5384 raise errors.OpPrereqError("Instance's disk layout is not"
5385 " network mirrored, cannot failover.",
5388 secondary_nodes = instance.secondary_nodes
5389 if not secondary_nodes:
5390 raise errors.ProgrammerError("no secondary node but using "
5391 "a mirrored disk template")
5393 target_node = secondary_nodes[0]
5394 _CheckNodeOnline(self, target_node)
5395 _CheckNodeNotDrained(self, target_node)
5396 if instance.admin_up:
5397 # check memory requirements on the secondary node
5398 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5399 instance.name, bep[constants.BE_MEMORY],
5400 instance.hypervisor)
5402 self.LogInfo("Not checking memory on the secondary node as"
5403 " instance will not be started")
5405 # check bridge existance
5406 _CheckInstanceBridgesExist(self, instance, node=target_node)
5408 def Exec(self, feedback_fn):
5409 """Failover an instance.
5411 The failover is done by shutting it down on its present node and
5412 starting it on the secondary.
5415 instance = self.instance
5417 source_node = instance.primary_node
5418 target_node = instance.secondary_nodes[0]
5420 if instance.admin_up:
5421 feedback_fn("* checking disk consistency between source and target")
5422 for dev in instance.disks:
5423 # for drbd, these are drbd over lvm
5424 if not _CheckDiskConsistency(self, dev, target_node, False):
5425 if not self.op.ignore_consistency:
5426 raise errors.OpExecError("Disk %s is degraded on target node,"
5427 " aborting failover." % dev.iv_name)
5429 feedback_fn("* not checking disk consistency as instance is not running")
5431 feedback_fn("* shutting down instance on source node")
5432 logging.info("Shutting down instance %s on node %s",
5433 instance.name, source_node)
5435 result = self.rpc.call_instance_shutdown(source_node, instance,
5436 self.op.shutdown_timeout)
5437 msg = result.fail_msg
5439 if self.op.ignore_consistency:
5440 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5441 " Proceeding anyway. Please make sure node"
5442 " %s is down. Error details: %s",
5443 instance.name, source_node, source_node, msg)
5445 raise errors.OpExecError("Could not shutdown instance %s on"
5447 (instance.name, source_node, msg))
5449 feedback_fn("* deactivating the instance's disks on source node")
5450 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5451 raise errors.OpExecError("Can't shut down the instance's disks.")
5453 instance.primary_node = target_node
5454 # distribute new instance config to the other nodes
5455 self.cfg.Update(instance, feedback_fn)
5457 # Only start the instance if it's marked as up
5458 if instance.admin_up:
5459 feedback_fn("* activating the instance's disks on target node")
5460 logging.info("Starting instance %s on node %s",
5461 instance.name, target_node)
5463 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5464 ignore_secondaries=True)
5466 _ShutdownInstanceDisks(self, instance)
5467 raise errors.OpExecError("Can't activate the instance's disks")
5469 feedback_fn("* starting the instance on the target node")
5470 result = self.rpc.call_instance_start(target_node, instance, None, None)
5471 msg = result.fail_msg
5473 _ShutdownInstanceDisks(self, instance)
5474 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5475 (instance.name, target_node, msg))
5478 class LUMigrateInstance(LogicalUnit):
5479 """Migrate an instance.
5481 This is migration without shutting down, compared to the failover,
5482 which is done with shutdown.
5485 HPATH = "instance-migrate"
5486 HTYPE = constants.HTYPE_INSTANCE
5489 ("live", True, _TBool),
5490 ("cleanup", False, _TBool),
5495 def ExpandNames(self):
5496 self._ExpandAndLockInstance()
5498 self.needed_locks[locking.LEVEL_NODE] = []
5499 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5501 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5502 self.op.live, self.op.cleanup)
5503 self.tasklets = [self._migrater]
5505 def DeclareLocks(self, level):
5506 if level == locking.LEVEL_NODE:
5507 self._LockInstancesNodes()
5509 def BuildHooksEnv(self):
5512 This runs on master, primary and secondary nodes of the instance.
5515 instance = self._migrater.instance
5516 source_node = instance.primary_node
5517 target_node = instance.secondary_nodes[0]
5518 env = _BuildInstanceHookEnvByObject(self, instance)
5519 env["MIGRATE_LIVE"] = self.op.live
5520 env["MIGRATE_CLEANUP"] = self.op.cleanup
5522 "OLD_PRIMARY": source_node,
5523 "OLD_SECONDARY": target_node,
5524 "NEW_PRIMARY": target_node,
5525 "NEW_SECONDARY": source_node,
5527 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5529 nl_post.append(source_node)
5530 return env, nl, nl_post
5533 class LUMoveInstance(LogicalUnit):
5534 """Move an instance by data-copying.
5537 HPATH = "instance-move"
5538 HTYPE = constants.HTYPE_INSTANCE
5541 ("target_node", _NoDefault, _TNonEmptyString),
5546 def ExpandNames(self):
5547 self._ExpandAndLockInstance()
5548 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5549 self.op.target_node = target_node
5550 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5551 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5553 def DeclareLocks(self, level):
5554 if level == locking.LEVEL_NODE:
5555 self._LockInstancesNodes(primary_only=True)
5557 def BuildHooksEnv(self):
5560 This runs on master, primary and secondary nodes of the instance.
5564 "TARGET_NODE": self.op.target_node,
5565 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5567 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5568 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5569 self.op.target_node]
5572 def CheckPrereq(self):
5573 """Check prerequisites.
5575 This checks that the instance is in the cluster.
5578 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5579 assert self.instance is not None, \
5580 "Cannot retrieve locked instance %s" % self.op.instance_name
5582 node = self.cfg.GetNodeInfo(self.op.target_node)
5583 assert node is not None, \
5584 "Cannot retrieve locked node %s" % self.op.target_node
5586 self.target_node = target_node = node.name
5588 if target_node == instance.primary_node:
5589 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5590 (instance.name, target_node),
5593 bep = self.cfg.GetClusterInfo().FillBE(instance)
5595 for idx, dsk in enumerate(instance.disks):
5596 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5597 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5598 " cannot copy" % idx, errors.ECODE_STATE)
5600 _CheckNodeOnline(self, target_node)
5601 _CheckNodeNotDrained(self, target_node)
5603 if instance.admin_up:
5604 # check memory requirements on the secondary node
5605 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5606 instance.name, bep[constants.BE_MEMORY],
5607 instance.hypervisor)
5609 self.LogInfo("Not checking memory on the secondary node as"
5610 " instance will not be started")
5612 # check bridge existance
5613 _CheckInstanceBridgesExist(self, instance, node=target_node)
5615 def Exec(self, feedback_fn):
5616 """Move an instance.
5618 The move is done by shutting it down on its present node, copying
5619 the data over (slow) and starting it on the new node.
5622 instance = self.instance
5624 source_node = instance.primary_node
5625 target_node = self.target_node
5627 self.LogInfo("Shutting down instance %s on source node %s",
5628 instance.name, source_node)
5630 result = self.rpc.call_instance_shutdown(source_node, instance,
5631 self.op.shutdown_timeout)
5632 msg = result.fail_msg
5634 if self.op.ignore_consistency:
5635 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5636 " Proceeding anyway. Please make sure node"
5637 " %s is down. Error details: %s",
5638 instance.name, source_node, source_node, msg)
5640 raise errors.OpExecError("Could not shutdown instance %s on"
5642 (instance.name, source_node, msg))
5644 # create the target disks
5646 _CreateDisks(self, instance, target_node=target_node)
5647 except errors.OpExecError:
5648 self.LogWarning("Device creation failed, reverting...")
5650 _RemoveDisks(self, instance, target_node=target_node)
5652 self.cfg.ReleaseDRBDMinors(instance.name)
5655 cluster_name = self.cfg.GetClusterInfo().cluster_name
5658 # activate, get path, copy the data over
5659 for idx, disk in enumerate(instance.disks):
5660 self.LogInfo("Copying data for disk %d", idx)
5661 result = self.rpc.call_blockdev_assemble(target_node, disk,
5662 instance.name, True)
5664 self.LogWarning("Can't assemble newly created disk %d: %s",
5665 idx, result.fail_msg)
5666 errs.append(result.fail_msg)
5668 dev_path = result.payload
5669 result = self.rpc.call_blockdev_export(source_node, disk,
5670 target_node, dev_path,
5673 self.LogWarning("Can't copy data over for disk %d: %s",
5674 idx, result.fail_msg)
5675 errs.append(result.fail_msg)
5679 self.LogWarning("Some disks failed to copy, aborting")
5681 _RemoveDisks(self, instance, target_node=target_node)
5683 self.cfg.ReleaseDRBDMinors(instance.name)
5684 raise errors.OpExecError("Errors during disk copy: %s" %
5687 instance.primary_node = target_node
5688 self.cfg.Update(instance, feedback_fn)
5690 self.LogInfo("Removing the disks on the original node")
5691 _RemoveDisks(self, instance, target_node=source_node)
5693 # Only start the instance if it's marked as up
5694 if instance.admin_up:
5695 self.LogInfo("Starting instance %s on node %s",
5696 instance.name, target_node)
5698 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5699 ignore_secondaries=True)
5701 _ShutdownInstanceDisks(self, instance)
5702 raise errors.OpExecError("Can't activate the instance's disks")
5704 result = self.rpc.call_instance_start(target_node, instance, None, None)
5705 msg = result.fail_msg
5707 _ShutdownInstanceDisks(self, instance)
5708 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5709 (instance.name, target_node, msg))
5712 class LUMigrateNode(LogicalUnit):
5713 """Migrate all instances from a node.
5716 HPATH = "node-migrate"
5717 HTYPE = constants.HTYPE_NODE
5720 ("live", False, _TBool),
5724 def ExpandNames(self):
5725 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5727 self.needed_locks = {
5728 locking.LEVEL_NODE: [self.op.node_name],
5731 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5733 # Create tasklets for migrating instances for all instances on this node
5737 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5738 logging.debug("Migrating instance %s", inst.name)
5739 names.append(inst.name)
5741 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5743 self.tasklets = tasklets
5745 # Declare instance locks
5746 self.needed_locks[locking.LEVEL_INSTANCE] = names
5748 def DeclareLocks(self, level):
5749 if level == locking.LEVEL_NODE:
5750 self._LockInstancesNodes()
5752 def BuildHooksEnv(self):
5755 This runs on the master, the primary and all the secondaries.
5759 "NODE_NAME": self.op.node_name,
5762 nl = [self.cfg.GetMasterNode()]
5764 return (env, nl, nl)
5767 class TLMigrateInstance(Tasklet):
5768 def __init__(self, lu, instance_name, live, cleanup):
5769 """Initializes this class.
5772 Tasklet.__init__(self, lu)
5775 self.instance_name = instance_name
5777 self.cleanup = cleanup
5779 def CheckPrereq(self):
5780 """Check prerequisites.
5782 This checks that the instance is in the cluster.
5785 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5786 instance = self.cfg.GetInstanceInfo(instance_name)
5787 assert instance is not None
5789 if instance.disk_template != constants.DT_DRBD8:
5790 raise errors.OpPrereqError("Instance's disk layout is not"
5791 " drbd8, cannot migrate.", errors.ECODE_STATE)
5793 secondary_nodes = instance.secondary_nodes
5794 if not secondary_nodes:
5795 raise errors.ConfigurationError("No secondary node but using"
5796 " drbd8 disk template")
5798 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5800 target_node = secondary_nodes[0]
5801 # check memory requirements on the secondary node
5802 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5803 instance.name, i_be[constants.BE_MEMORY],
5804 instance.hypervisor)
5806 # check bridge existance
5807 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5809 if not self.cleanup:
5810 _CheckNodeNotDrained(self.lu, target_node)
5811 result = self.rpc.call_instance_migratable(instance.primary_node,
5813 result.Raise("Can't migrate, please use failover",
5814 prereq=True, ecode=errors.ECODE_STATE)
5816 self.instance = instance
5818 def _WaitUntilSync(self):
5819 """Poll with custom rpc for disk sync.
5821 This uses our own step-based rpc call.
5824 self.feedback_fn("* wait until resync is done")
5828 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5830 self.instance.disks)
5832 for node, nres in result.items():
5833 nres.Raise("Cannot resync disks on node %s" % node)
5834 node_done, node_percent = nres.payload
5835 all_done = all_done and node_done
5836 if node_percent is not None:
5837 min_percent = min(min_percent, node_percent)
5839 if min_percent < 100:
5840 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5843 def _EnsureSecondary(self, node):
5844 """Demote a node to secondary.
5847 self.feedback_fn("* switching node %s to secondary mode" % node)
5849 for dev in self.instance.disks:
5850 self.cfg.SetDiskID(dev, node)
5852 result = self.rpc.call_blockdev_close(node, self.instance.name,
5853 self.instance.disks)
5854 result.Raise("Cannot change disk to secondary on node %s" % node)
5856 def _GoStandalone(self):
5857 """Disconnect from the network.
5860 self.feedback_fn("* changing into standalone mode")
5861 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5862 self.instance.disks)
5863 for node, nres in result.items():
5864 nres.Raise("Cannot disconnect disks node %s" % node)
5866 def _GoReconnect(self, multimaster):
5867 """Reconnect to the network.
5873 msg = "single-master"
5874 self.feedback_fn("* changing disks into %s mode" % msg)
5875 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5876 self.instance.disks,
5877 self.instance.name, multimaster)
5878 for node, nres in result.items():
5879 nres.Raise("Cannot change disks config on node %s" % node)
5881 def _ExecCleanup(self):
5882 """Try to cleanup after a failed migration.
5884 The cleanup is done by:
5885 - check that the instance is running only on one node
5886 (and update the config if needed)
5887 - change disks on its secondary node to secondary
5888 - wait until disks are fully synchronized
5889 - disconnect from the network
5890 - change disks into single-master mode
5891 - wait again until disks are fully synchronized
5894 instance = self.instance
5895 target_node = self.target_node
5896 source_node = self.source_node
5898 # check running on only one node
5899 self.feedback_fn("* checking where the instance actually runs"
5900 " (if this hangs, the hypervisor might be in"
5902 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5903 for node, result in ins_l.items():
5904 result.Raise("Can't contact node %s" % node)
5906 runningon_source = instance.name in ins_l[source_node].payload
5907 runningon_target = instance.name in ins_l[target_node].payload
5909 if runningon_source and runningon_target:
5910 raise errors.OpExecError("Instance seems to be running on two nodes,"
5911 " or the hypervisor is confused. You will have"
5912 " to ensure manually that it runs only on one"
5913 " and restart this operation.")
5915 if not (runningon_source or runningon_target):
5916 raise errors.OpExecError("Instance does not seem to be running at all."
5917 " In this case, it's safer to repair by"
5918 " running 'gnt-instance stop' to ensure disk"
5919 " shutdown, and then restarting it.")
5921 if runningon_target:
5922 # the migration has actually succeeded, we need to update the config
5923 self.feedback_fn("* instance running on secondary node (%s),"
5924 " updating config" % target_node)
5925 instance.primary_node = target_node
5926 self.cfg.Update(instance, self.feedback_fn)
5927 demoted_node = source_node
5929 self.feedback_fn("* instance confirmed to be running on its"
5930 " primary node (%s)" % source_node)
5931 demoted_node = target_node
5933 self._EnsureSecondary(demoted_node)
5935 self._WaitUntilSync()
5936 except errors.OpExecError:
5937 # we ignore here errors, since if the device is standalone, it
5938 # won't be able to sync
5940 self._GoStandalone()
5941 self._GoReconnect(False)
5942 self._WaitUntilSync()
5944 self.feedback_fn("* done")
5946 def _RevertDiskStatus(self):
5947 """Try to revert the disk status after a failed migration.
5950 target_node = self.target_node
5952 self._EnsureSecondary(target_node)
5953 self._GoStandalone()
5954 self._GoReconnect(False)
5955 self._WaitUntilSync()
5956 except errors.OpExecError, err:
5957 self.lu.LogWarning("Migration failed and I can't reconnect the"
5958 " drives: error '%s'\n"
5959 "Please look and recover the instance status" %
5962 def _AbortMigration(self):
5963 """Call the hypervisor code to abort a started migration.
5966 instance = self.instance
5967 target_node = self.target_node
5968 migration_info = self.migration_info
5970 abort_result = self.rpc.call_finalize_migration(target_node,
5974 abort_msg = abort_result.fail_msg
5976 logging.error("Aborting migration failed on target node %s: %s",
5977 target_node, abort_msg)
5978 # Don't raise an exception here, as we stil have to try to revert the
5979 # disk status, even if this step failed.
5981 def _ExecMigration(self):
5982 """Migrate an instance.
5984 The migrate is done by:
5985 - change the disks into dual-master mode
5986 - wait until disks are fully synchronized again
5987 - migrate the instance
5988 - change disks on the new secondary node (the old primary) to secondary
5989 - wait until disks are fully synchronized
5990 - change disks into single-master mode
5993 instance = self.instance
5994 target_node = self.target_node
5995 source_node = self.source_node
5997 self.feedback_fn("* checking disk consistency between source and target")
5998 for dev in instance.disks:
5999 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6000 raise errors.OpExecError("Disk %s is degraded or not fully"
6001 " synchronized on target node,"
6002 " aborting migrate." % dev.iv_name)
6004 # First get the migration information from the remote node
6005 result = self.rpc.call_migration_info(source_node, instance)
6006 msg = result.fail_msg
6008 log_err = ("Failed fetching source migration information from %s: %s" %
6010 logging.error(log_err)
6011 raise errors.OpExecError(log_err)
6013 self.migration_info = migration_info = result.payload
6015 # Then switch the disks to master/master mode
6016 self._EnsureSecondary(target_node)
6017 self._GoStandalone()
6018 self._GoReconnect(True)
6019 self._WaitUntilSync()
6021 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6022 result = self.rpc.call_accept_instance(target_node,
6025 self.nodes_ip[target_node])
6027 msg = result.fail_msg
6029 logging.error("Instance pre-migration failed, trying to revert"
6030 " disk status: %s", msg)
6031 self.feedback_fn("Pre-migration failed, aborting")
6032 self._AbortMigration()
6033 self._RevertDiskStatus()
6034 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6035 (instance.name, msg))
6037 self.feedback_fn("* migrating instance to %s" % target_node)
6039 result = self.rpc.call_instance_migrate(source_node, instance,
6040 self.nodes_ip[target_node],
6042 msg = result.fail_msg
6044 logging.error("Instance migration failed, trying to revert"
6045 " disk status: %s", msg)
6046 self.feedback_fn("Migration failed, aborting")
6047 self._AbortMigration()
6048 self._RevertDiskStatus()
6049 raise errors.OpExecError("Could not migrate instance %s: %s" %
6050 (instance.name, msg))
6053 instance.primary_node = target_node
6054 # distribute new instance config to the other nodes
6055 self.cfg.Update(instance, self.feedback_fn)
6057 result = self.rpc.call_finalize_migration(target_node,
6061 msg = result.fail_msg
6063 logging.error("Instance migration succeeded, but finalization failed:"
6065 raise errors.OpExecError("Could not finalize instance migration: %s" %
6068 self._EnsureSecondary(source_node)
6069 self._WaitUntilSync()
6070 self._GoStandalone()
6071 self._GoReconnect(False)
6072 self._WaitUntilSync()
6074 self.feedback_fn("* done")
6076 def Exec(self, feedback_fn):
6077 """Perform the migration.
6080 feedback_fn("Migrating instance %s" % self.instance.name)
6082 self.feedback_fn = feedback_fn
6084 self.source_node = self.instance.primary_node
6085 self.target_node = self.instance.secondary_nodes[0]
6086 self.all_nodes = [self.source_node, self.target_node]
6088 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6089 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6093 return self._ExecCleanup()
6095 return self._ExecMigration()
6098 def _CreateBlockDev(lu, node, instance, device, force_create,
6100 """Create a tree of block devices on a given node.
6102 If this device type has to be created on secondaries, create it and
6105 If not, just recurse to children keeping the same 'force' value.
6107 @param lu: the lu on whose behalf we execute
6108 @param node: the node on which to create the device
6109 @type instance: L{objects.Instance}
6110 @param instance: the instance which owns the device
6111 @type device: L{objects.Disk}
6112 @param device: the device to create
6113 @type force_create: boolean
6114 @param force_create: whether to force creation of this device; this
6115 will be change to True whenever we find a device which has
6116 CreateOnSecondary() attribute
6117 @param info: the extra 'metadata' we should attach to the device
6118 (this will be represented as a LVM tag)
6119 @type force_open: boolean
6120 @param force_open: this parameter will be passes to the
6121 L{backend.BlockdevCreate} function where it specifies
6122 whether we run on primary or not, and it affects both
6123 the child assembly and the device own Open() execution
6126 if device.CreateOnSecondary():
6130 for child in device.children:
6131 _CreateBlockDev(lu, node, instance, child, force_create,
6134 if not force_create:
6137 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6140 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6141 """Create a single block device on a given node.
6143 This will not recurse over children of the device, so they must be
6146 @param lu: the lu on whose behalf we execute
6147 @param node: the node on which to create the device
6148 @type instance: L{objects.Instance}
6149 @param instance: the instance which owns the device
6150 @type device: L{objects.Disk}
6151 @param device: the device to create
6152 @param info: the extra 'metadata' we should attach to the device
6153 (this will be represented as a LVM tag)
6154 @type force_open: boolean
6155 @param force_open: this parameter will be passes to the
6156 L{backend.BlockdevCreate} function where it specifies
6157 whether we run on primary or not, and it affects both
6158 the child assembly and the device own Open() execution
6161 lu.cfg.SetDiskID(device, node)
6162 result = lu.rpc.call_blockdev_create(node, device, device.size,
6163 instance.name, force_open, info)
6164 result.Raise("Can't create block device %s on"
6165 " node %s for instance %s" % (device, node, instance.name))
6166 if device.physical_id is None:
6167 device.physical_id = result.payload
6170 def _GenerateUniqueNames(lu, exts):
6171 """Generate a suitable LV name.
6173 This will generate a logical volume name for the given instance.
6178 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6179 results.append("%s%s" % (new_id, val))
6183 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6185 """Generate a drbd8 device complete with its children.
6188 port = lu.cfg.AllocatePort()
6189 vgname = lu.cfg.GetVGName()
6190 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6191 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6192 logical_id=(vgname, names[0]))
6193 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6194 logical_id=(vgname, names[1]))
6195 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6196 logical_id=(primary, secondary, port,
6199 children=[dev_data, dev_meta],
6204 def _GenerateDiskTemplate(lu, template_name,
6205 instance_name, primary_node,
6206 secondary_nodes, disk_info,
6207 file_storage_dir, file_driver,
6209 """Generate the entire disk layout for a given template type.
6212 #TODO: compute space requirements
6214 vgname = lu.cfg.GetVGName()
6215 disk_count = len(disk_info)
6217 if template_name == constants.DT_DISKLESS:
6219 elif template_name == constants.DT_PLAIN:
6220 if len(secondary_nodes) != 0:
6221 raise errors.ProgrammerError("Wrong template configuration")
6223 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6224 for i in range(disk_count)])
6225 for idx, disk in enumerate(disk_info):
6226 disk_index = idx + base_index
6227 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6228 logical_id=(vgname, names[idx]),
6229 iv_name="disk/%d" % disk_index,
6231 disks.append(disk_dev)
6232 elif template_name == constants.DT_DRBD8:
6233 if len(secondary_nodes) != 1:
6234 raise errors.ProgrammerError("Wrong template configuration")
6235 remote_node = secondary_nodes[0]
6236 minors = lu.cfg.AllocateDRBDMinor(
6237 [primary_node, remote_node] * len(disk_info), instance_name)
6240 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6241 for i in range(disk_count)]):
6242 names.append(lv_prefix + "_data")
6243 names.append(lv_prefix + "_meta")
6244 for idx, disk in enumerate(disk_info):
6245 disk_index = idx + base_index
6246 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6247 disk["size"], names[idx*2:idx*2+2],
6248 "disk/%d" % disk_index,
6249 minors[idx*2], minors[idx*2+1])
6250 disk_dev.mode = disk["mode"]
6251 disks.append(disk_dev)
6252 elif template_name == constants.DT_FILE:
6253 if len(secondary_nodes) != 0:
6254 raise errors.ProgrammerError("Wrong template configuration")
6256 _RequireFileStorage()
6258 for idx, disk in enumerate(disk_info):
6259 disk_index = idx + base_index
6260 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6261 iv_name="disk/%d" % disk_index,
6262 logical_id=(file_driver,
6263 "%s/disk%d" % (file_storage_dir,
6266 disks.append(disk_dev)
6268 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6272 def _GetInstanceInfoText(instance):
6273 """Compute that text that should be added to the disk's metadata.
6276 return "originstname+%s" % instance.name
6279 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6280 """Create all disks for an instance.
6282 This abstracts away some work from AddInstance.
6284 @type lu: L{LogicalUnit}
6285 @param lu: the logical unit on whose behalf we execute
6286 @type instance: L{objects.Instance}
6287 @param instance: the instance whose disks we should create
6289 @param to_skip: list of indices to skip
6290 @type target_node: string
6291 @param target_node: if passed, overrides the target node for creation
6293 @return: the success of the creation
6296 info = _GetInstanceInfoText(instance)
6297 if target_node is None:
6298 pnode = instance.primary_node
6299 all_nodes = instance.all_nodes
6304 if instance.disk_template == constants.DT_FILE:
6305 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6306 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6308 result.Raise("Failed to create directory '%s' on"
6309 " node %s" % (file_storage_dir, pnode))
6311 # Note: this needs to be kept in sync with adding of disks in
6312 # LUSetInstanceParams
6313 for idx, device in enumerate(instance.disks):
6314 if to_skip and idx in to_skip:
6316 logging.info("Creating volume %s for instance %s",
6317 device.iv_name, instance.name)
6319 for node in all_nodes:
6320 f_create = node == pnode
6321 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6324 def _RemoveDisks(lu, instance, target_node=None):
6325 """Remove all disks for an instance.
6327 This abstracts away some work from `AddInstance()` and
6328 `RemoveInstance()`. Note that in case some of the devices couldn't
6329 be removed, the removal will continue with the other ones (compare
6330 with `_CreateDisks()`).
6332 @type lu: L{LogicalUnit}
6333 @param lu: the logical unit on whose behalf we execute
6334 @type instance: L{objects.Instance}
6335 @param instance: the instance whose disks we should remove
6336 @type target_node: string
6337 @param target_node: used to override the node on which to remove the disks
6339 @return: the success of the removal
6342 logging.info("Removing block devices for instance %s", instance.name)
6345 for device in instance.disks:
6347 edata = [(target_node, device)]
6349 edata = device.ComputeNodeTree(instance.primary_node)
6350 for node, disk in edata:
6351 lu.cfg.SetDiskID(disk, node)
6352 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6354 lu.LogWarning("Could not remove block device %s on node %s,"
6355 " continuing anyway: %s", device.iv_name, node, msg)
6358 if instance.disk_template == constants.DT_FILE:
6359 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6363 tgt = instance.primary_node
6364 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6366 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6367 file_storage_dir, instance.primary_node, result.fail_msg)
6373 def _ComputeDiskSize(disk_template, disks):
6374 """Compute disk size requirements in the volume group
6377 # Required free disk space as a function of disk and swap space
6379 constants.DT_DISKLESS: None,
6380 constants.DT_PLAIN: sum(d["size"] for d in disks),
6381 # 128 MB are added for drbd metadata for each disk
6382 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6383 constants.DT_FILE: None,
6386 if disk_template not in req_size_dict:
6387 raise errors.ProgrammerError("Disk template '%s' size requirement"
6388 " is unknown" % disk_template)
6390 return req_size_dict[disk_template]
6393 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6394 """Hypervisor parameter validation.
6396 This function abstract the hypervisor parameter validation to be
6397 used in both instance create and instance modify.
6399 @type lu: L{LogicalUnit}
6400 @param lu: the logical unit for which we check
6401 @type nodenames: list
6402 @param nodenames: the list of nodes on which we should check
6403 @type hvname: string
6404 @param hvname: the name of the hypervisor we should use
6405 @type hvparams: dict
6406 @param hvparams: the parameters which we need to check
6407 @raise errors.OpPrereqError: if the parameters are not valid
6410 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6413 for node in nodenames:
6417 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6420 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6421 """OS parameters validation.
6423 @type lu: L{LogicalUnit}
6424 @param lu: the logical unit for which we check
6425 @type required: boolean
6426 @param required: whether the validation should fail if the OS is not
6428 @type nodenames: list
6429 @param nodenames: the list of nodes on which we should check
6430 @type osname: string
6431 @param osname: the name of the hypervisor we should use
6432 @type osparams: dict
6433 @param osparams: the parameters which we need to check
6434 @raise errors.OpPrereqError: if the parameters are not valid
6437 result = lu.rpc.call_os_validate(required, nodenames, osname,
6438 [constants.OS_VALIDATE_PARAMETERS],
6440 for node, nres in result.items():
6441 # we don't check for offline cases since this should be run only
6442 # against the master node and/or an instance's nodes
6443 nres.Raise("OS Parameters validation failed on node %s" % node)
6444 if not nres.payload:
6445 lu.LogInfo("OS %s not found on node %s, validation skipped",
6449 class LUCreateInstance(LogicalUnit):
6450 """Create an instance.
6453 HPATH = "instance-add"
6454 HTYPE = constants.HTYPE_INSTANCE
6457 ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6458 ("start", True, _TBool),
6459 ("wait_for_sync", True, _TBool),
6460 ("ip_check", True, _TBool),
6461 ("name_check", True, _TBool),
6462 ("disks", _NoDefault, _TListOf(_TDict)),
6463 ("nics", _NoDefault, _TListOf(_TDict)),
6464 ("hvparams", _EmptyDict, _TDict),
6465 ("beparams", _EmptyDict, _TDict),
6466 ("osparams", _EmptyDict, _TDict),
6467 ("no_install", None, _TMaybeBool),
6468 ("os_type", None, _TMaybeString),
6469 ("force_variant", False, _TBool),
6470 ("source_handshake", None, _TOr(_TList, _TNone)),
6471 ("source_x509_ca", None, _TOr(_TList, _TNone)),
6472 ("source_instance_name", None, _TMaybeString),
6473 ("src_node", None, _TMaybeString),
6474 ("src_path", None, _TMaybeString),
6475 ("pnode", None, _TMaybeString),
6476 ("snode", None, _TMaybeString),
6477 ("iallocator", None, _TMaybeString),
6478 ("hypervisor", None, _TMaybeString),
6479 ("disk_template", _NoDefault, _CheckDiskTemplate),
6480 ("identify_defaults", False, _TBool),
6481 ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6482 ("file_storage_dir", None, _TMaybeString),
6483 ("dry_run", False, _TBool),
6487 def CheckArguments(self):
6491 # do not require name_check to ease forward/backward compatibility
6493 if self.op.no_install and self.op.start:
6494 self.LogInfo("No-installation mode selected, disabling startup")
6495 self.op.start = False
6496 # validate/normalize the instance name
6497 self.op.instance_name = \
6498 netutils.HostInfo.NormalizeName(self.op.instance_name)
6500 if self.op.ip_check and not self.op.name_check:
6501 # TODO: make the ip check more flexible and not depend on the name check
6502 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6505 # check nics' parameter names
6506 for nic in self.op.nics:
6507 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6509 # check disks. parameter names and consistent adopt/no-adopt strategy
6510 has_adopt = has_no_adopt = False
6511 for disk in self.op.disks:
6512 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6517 if has_adopt and has_no_adopt:
6518 raise errors.OpPrereqError("Either all disks are adopted or none is",
6521 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6522 raise errors.OpPrereqError("Disk adoption is not supported for the"
6523 " '%s' disk template" %
6524 self.op.disk_template,
6526 if self.op.iallocator is not None:
6527 raise errors.OpPrereqError("Disk adoption not allowed with an"
6528 " iallocator script", errors.ECODE_INVAL)
6529 if self.op.mode == constants.INSTANCE_IMPORT:
6530 raise errors.OpPrereqError("Disk adoption not allowed for"
6531 " instance import", errors.ECODE_INVAL)
6533 self.adopt_disks = has_adopt
6535 # instance name verification
6536 if self.op.name_check:
6537 self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6538 self.op.instance_name = self.hostname1.name
6539 # used in CheckPrereq for ip ping check
6540 self.check_ip = self.hostname1.ip
6541 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6542 raise errors.OpPrereqError("Remote imports require names to be checked" %
6545 self.check_ip = None
6547 # file storage checks
6548 if (self.op.file_driver and
6549 not self.op.file_driver in constants.FILE_DRIVER):
6550 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6551 self.op.file_driver, errors.ECODE_INVAL)
6553 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6554 raise errors.OpPrereqError("File storage directory path not absolute",
6557 ### Node/iallocator related checks
6558 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6560 self._cds = _GetClusterDomainSecret()
6562 if self.op.mode == constants.INSTANCE_IMPORT:
6563 # On import force_variant must be True, because if we forced it at
6564 # initial install, our only chance when importing it back is that it
6566 self.op.force_variant = True
6568 if self.op.no_install:
6569 self.LogInfo("No-installation mode has no effect during import")
6571 elif self.op.mode == constants.INSTANCE_CREATE:
6572 if self.op.os_type is None:
6573 raise errors.OpPrereqError("No guest OS specified",
6575 if self.op.disk_template is None:
6576 raise errors.OpPrereqError("No disk template specified",
6579 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6580 # Check handshake to ensure both clusters have the same domain secret
6581 src_handshake = self.op.source_handshake
6582 if not src_handshake:
6583 raise errors.OpPrereqError("Missing source handshake",
6586 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6589 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6592 # Load and check source CA
6593 self.source_x509_ca_pem = self.op.source_x509_ca
6594 if not self.source_x509_ca_pem:
6595 raise errors.OpPrereqError("Missing source X509 CA",
6599 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6601 except OpenSSL.crypto.Error, err:
6602 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6603 (err, ), errors.ECODE_INVAL)
6605 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6606 if errcode is not None:
6607 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6610 self.source_x509_ca = cert
6612 src_instance_name = self.op.source_instance_name
6613 if not src_instance_name:
6614 raise errors.OpPrereqError("Missing source instance name",
6617 norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6618 self.source_instance_name = netutils.GetHostInfo(norm_name).name
6621 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6622 self.op.mode, errors.ECODE_INVAL)
6624 def ExpandNames(self):
6625 """ExpandNames for CreateInstance.
6627 Figure out the right locks for instance creation.
6630 self.needed_locks = {}
6632 instance_name = self.op.instance_name
6633 # this is just a preventive check, but someone might still add this
6634 # instance in the meantime, and creation will fail at lock-add time
6635 if instance_name in self.cfg.GetInstanceList():
6636 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6637 instance_name, errors.ECODE_EXISTS)
6639 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6641 if self.op.iallocator:
6642 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6644 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6645 nodelist = [self.op.pnode]
6646 if self.op.snode is not None:
6647 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6648 nodelist.append(self.op.snode)
6649 self.needed_locks[locking.LEVEL_NODE] = nodelist
6651 # in case of import lock the source node too
6652 if self.op.mode == constants.INSTANCE_IMPORT:
6653 src_node = self.op.src_node
6654 src_path = self.op.src_path
6656 if src_path is None:
6657 self.op.src_path = src_path = self.op.instance_name
6659 if src_node is None:
6660 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6661 self.op.src_node = None
6662 if os.path.isabs(src_path):
6663 raise errors.OpPrereqError("Importing an instance from an absolute"
6664 " path requires a source node option.",
6667 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6668 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6669 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6670 if not os.path.isabs(src_path):
6671 self.op.src_path = src_path = \
6672 utils.PathJoin(constants.EXPORT_DIR, src_path)
6674 def _RunAllocator(self):
6675 """Run the allocator based on input opcode.
6678 nics = [n.ToDict() for n in self.nics]
6679 ial = IAllocator(self.cfg, self.rpc,
6680 mode=constants.IALLOCATOR_MODE_ALLOC,
6681 name=self.op.instance_name,
6682 disk_template=self.op.disk_template,
6685 vcpus=self.be_full[constants.BE_VCPUS],
6686 mem_size=self.be_full[constants.BE_MEMORY],
6689 hypervisor=self.op.hypervisor,
6692 ial.Run(self.op.iallocator)
6695 raise errors.OpPrereqError("Can't compute nodes using"
6696 " iallocator '%s': %s" %
6697 (self.op.iallocator, ial.info),
6699 if len(ial.result) != ial.required_nodes:
6700 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6701 " of nodes (%s), required %s" %
6702 (self.op.iallocator, len(ial.result),
6703 ial.required_nodes), errors.ECODE_FAULT)
6704 self.op.pnode = ial.result[0]
6705 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6706 self.op.instance_name, self.op.iallocator,
6707 utils.CommaJoin(ial.result))
6708 if ial.required_nodes == 2:
6709 self.op.snode = ial.result[1]
6711 def BuildHooksEnv(self):
6714 This runs on master, primary and secondary nodes of the instance.
6718 "ADD_MODE": self.op.mode,
6720 if self.op.mode == constants.INSTANCE_IMPORT:
6721 env["SRC_NODE"] = self.op.src_node
6722 env["SRC_PATH"] = self.op.src_path
6723 env["SRC_IMAGES"] = self.src_images
6725 env.update(_BuildInstanceHookEnv(
6726 name=self.op.instance_name,
6727 primary_node=self.op.pnode,
6728 secondary_nodes=self.secondaries,
6729 status=self.op.start,
6730 os_type=self.op.os_type,
6731 memory=self.be_full[constants.BE_MEMORY],
6732 vcpus=self.be_full[constants.BE_VCPUS],
6733 nics=_NICListToTuple(self, self.nics),
6734 disk_template=self.op.disk_template,
6735 disks=[(d["size"], d["mode"]) for d in self.disks],
6738 hypervisor_name=self.op.hypervisor,
6741 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6745 def _ReadExportInfo(self):
6746 """Reads the export information from disk.
6748 It will override the opcode source node and path with the actual
6749 information, if these two were not specified before.
6751 @return: the export information
6754 assert self.op.mode == constants.INSTANCE_IMPORT
6756 src_node = self.op.src_node
6757 src_path = self.op.src_path
6759 if src_node is None:
6760 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6761 exp_list = self.rpc.call_export_list(locked_nodes)
6763 for node in exp_list:
6764 if exp_list[node].fail_msg:
6766 if src_path in exp_list[node].payload:
6768 self.op.src_node = src_node = node
6769 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6773 raise errors.OpPrereqError("No export found for relative path %s" %
6774 src_path, errors.ECODE_INVAL)
6776 _CheckNodeOnline(self, src_node)
6777 result = self.rpc.call_export_info(src_node, src_path)
6778 result.Raise("No export or invalid export found in dir %s" % src_path)
6780 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6781 if not export_info.has_section(constants.INISECT_EXP):
6782 raise errors.ProgrammerError("Corrupted export config",
6783 errors.ECODE_ENVIRON)
6785 ei_version = export_info.get(constants.INISECT_EXP, "version")
6786 if (int(ei_version) != constants.EXPORT_VERSION):
6787 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6788 (ei_version, constants.EXPORT_VERSION),
6789 errors.ECODE_ENVIRON)
6792 def _ReadExportParams(self, einfo):
6793 """Use export parameters as defaults.
6795 In case the opcode doesn't specify (as in override) some instance
6796 parameters, then try to use them from the export information, if
6800 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6802 if self.op.disk_template is None:
6803 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6804 self.op.disk_template = einfo.get(constants.INISECT_INS,
6807 raise errors.OpPrereqError("No disk template specified and the export"
6808 " is missing the disk_template information",
6811 if not self.op.disks:
6812 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6814 # TODO: import the disk iv_name too
6815 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6816 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6817 disks.append({"size": disk_sz})
6818 self.op.disks = disks
6820 raise errors.OpPrereqError("No disk info specified and the export"
6821 " is missing the disk information",
6824 if (not self.op.nics and
6825 einfo.has_option(constants.INISECT_INS, "nic_count")):
6827 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6829 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6830 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6835 if (self.op.hypervisor is None and
6836 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6837 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6838 if einfo.has_section(constants.INISECT_HYP):
6839 # use the export parameters but do not override the ones
6840 # specified by the user
6841 for name, value in einfo.items(constants.INISECT_HYP):
6842 if name not in self.op.hvparams:
6843 self.op.hvparams[name] = value
6845 if einfo.has_section(constants.INISECT_BEP):
6846 # use the parameters, without overriding
6847 for name, value in einfo.items(constants.INISECT_BEP):
6848 if name not in self.op.beparams:
6849 self.op.beparams[name] = value
6851 # try to read the parameters old style, from the main section
6852 for name in constants.BES_PARAMETERS:
6853 if (name not in self.op.beparams and
6854 einfo.has_option(constants.INISECT_INS, name)):
6855 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6857 if einfo.has_section(constants.INISECT_OSP):
6858 # use the parameters, without overriding
6859 for name, value in einfo.items(constants.INISECT_OSP):
6860 if name not in self.op.osparams:
6861 self.op.osparams[name] = value
6863 def _RevertToDefaults(self, cluster):
6864 """Revert the instance parameters to the default values.
6868 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6869 for name in self.op.hvparams.keys():
6870 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6871 del self.op.hvparams[name]
6873 be_defs = cluster.SimpleFillBE({})
6874 for name in self.op.beparams.keys():
6875 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6876 del self.op.beparams[name]
6878 nic_defs = cluster.SimpleFillNIC({})
6879 for nic in self.op.nics:
6880 for name in constants.NICS_PARAMETERS:
6881 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6884 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6885 for name in self.op.osparams.keys():
6886 if name in os_defs and os_defs[name] == self.op.osparams[name]:
6887 del self.op.osparams[name]
6889 def CheckPrereq(self):
6890 """Check prerequisites.
6893 if self.op.mode == constants.INSTANCE_IMPORT:
6894 export_info = self._ReadExportInfo()
6895 self._ReadExportParams(export_info)
6897 _CheckDiskTemplate(self.op.disk_template)
6899 if (not self.cfg.GetVGName() and
6900 self.op.disk_template not in constants.DTS_NOT_LVM):
6901 raise errors.OpPrereqError("Cluster does not support lvm-based"
6902 " instances", errors.ECODE_STATE)
6904 if self.op.hypervisor is None:
6905 self.op.hypervisor = self.cfg.GetHypervisorType()
6907 cluster = self.cfg.GetClusterInfo()
6908 enabled_hvs = cluster.enabled_hypervisors
6909 if self.op.hypervisor not in enabled_hvs:
6910 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6911 " cluster (%s)" % (self.op.hypervisor,
6912 ",".join(enabled_hvs)),
6915 # check hypervisor parameter syntax (locally)
6916 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6917 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6919 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6920 hv_type.CheckParameterSyntax(filled_hvp)
6921 self.hv_full = filled_hvp
6922 # check that we don't specify global parameters on an instance
6923 _CheckGlobalHvParams(self.op.hvparams)
6925 # fill and remember the beparams dict
6926 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6927 self.be_full = cluster.SimpleFillBE(self.op.beparams)
6929 # build os parameters
6930 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6932 # now that hvp/bep are in final format, let's reset to defaults,
6934 if self.op.identify_defaults:
6935 self._RevertToDefaults(cluster)
6939 for idx, nic in enumerate(self.op.nics):
6940 nic_mode_req = nic.get("mode", None)
6941 nic_mode = nic_mode_req
6942 if nic_mode is None:
6943 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6945 # in routed mode, for the first nic, the default ip is 'auto'
6946 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6947 default_ip_mode = constants.VALUE_AUTO
6949 default_ip_mode = constants.VALUE_NONE
6951 # ip validity checks
6952 ip = nic.get("ip", default_ip_mode)
6953 if ip is None or ip.lower() == constants.VALUE_NONE:
6955 elif ip.lower() == constants.VALUE_AUTO:
6956 if not self.op.name_check:
6957 raise errors.OpPrereqError("IP address set to auto but name checks"
6958 " have been skipped. Aborting.",
6960 nic_ip = self.hostname1.ip
6962 if not netutils.IsValidIP4(ip):
6963 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6964 " like a valid IP" % ip,
6968 # TODO: check the ip address for uniqueness
6969 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6970 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6973 # MAC address verification
6974 mac = nic.get("mac", constants.VALUE_AUTO)
6975 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6976 mac = utils.NormalizeAndValidateMac(mac)
6979 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6980 except errors.ReservationError:
6981 raise errors.OpPrereqError("MAC address %s already in use"
6982 " in cluster" % mac,
6983 errors.ECODE_NOTUNIQUE)
6985 # bridge verification
6986 bridge = nic.get("bridge", None)
6987 link = nic.get("link", None)
6989 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6990 " at the same time", errors.ECODE_INVAL)
6991 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6992 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6999 nicparams[constants.NIC_MODE] = nic_mode_req
7001 nicparams[constants.NIC_LINK] = link
7003 check_params = cluster.SimpleFillNIC(nicparams)
7004 objects.NIC.CheckParameterSyntax(check_params)
7005 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7007 # disk checks/pre-build
7009 for disk in self.op.disks:
7010 mode = disk.get("mode", constants.DISK_RDWR)
7011 if mode not in constants.DISK_ACCESS_SET:
7012 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7013 mode, errors.ECODE_INVAL)
7014 size = disk.get("size", None)
7016 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7019 except (TypeError, ValueError):
7020 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7022 new_disk = {"size": size, "mode": mode}
7024 new_disk["adopt"] = disk["adopt"]
7025 self.disks.append(new_disk)
7027 if self.op.mode == constants.INSTANCE_IMPORT:
7029 # Check that the new instance doesn't have less disks than the export
7030 instance_disks = len(self.disks)
7031 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7032 if instance_disks < export_disks:
7033 raise errors.OpPrereqError("Not enough disks to import."
7034 " (instance: %d, export: %d)" %
7035 (instance_disks, export_disks),
7039 for idx in range(export_disks):
7040 option = 'disk%d_dump' % idx
7041 if export_info.has_option(constants.INISECT_INS, option):
7042 # FIXME: are the old os-es, disk sizes, etc. useful?
7043 export_name = export_info.get(constants.INISECT_INS, option)
7044 image = utils.PathJoin(self.op.src_path, export_name)
7045 disk_images.append(image)
7047 disk_images.append(False)
7049 self.src_images = disk_images
7051 old_name = export_info.get(constants.INISECT_INS, 'name')
7053 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7054 except (TypeError, ValueError), err:
7055 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7056 " an integer: %s" % str(err),
7058 if self.op.instance_name == old_name:
7059 for idx, nic in enumerate(self.nics):
7060 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7061 nic_mac_ini = 'nic%d_mac' % idx
7062 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7064 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7066 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7067 if self.op.ip_check:
7068 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7069 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7070 (self.check_ip, self.op.instance_name),
7071 errors.ECODE_NOTUNIQUE)
7073 #### mac address generation
7074 # By generating here the mac address both the allocator and the hooks get
7075 # the real final mac address rather than the 'auto' or 'generate' value.
7076 # There is a race condition between the generation and the instance object
7077 # creation, which means that we know the mac is valid now, but we're not
7078 # sure it will be when we actually add the instance. If things go bad
7079 # adding the instance will abort because of a duplicate mac, and the
7080 # creation job will fail.
7081 for nic in self.nics:
7082 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7083 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7087 if self.op.iallocator is not None:
7088 self._RunAllocator()
7090 #### node related checks
7092 # check primary node
7093 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7094 assert self.pnode is not None, \
7095 "Cannot retrieve locked node %s" % self.op.pnode
7097 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7098 pnode.name, errors.ECODE_STATE)
7100 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7101 pnode.name, errors.ECODE_STATE)
7103 self.secondaries = []
7105 # mirror node verification
7106 if self.op.disk_template in constants.DTS_NET_MIRROR:
7107 if self.op.snode is None:
7108 raise errors.OpPrereqError("The networked disk templates need"
7109 " a mirror node", errors.ECODE_INVAL)
7110 if self.op.snode == pnode.name:
7111 raise errors.OpPrereqError("The secondary node cannot be the"
7112 " primary node.", errors.ECODE_INVAL)
7113 _CheckNodeOnline(self, self.op.snode)
7114 _CheckNodeNotDrained(self, self.op.snode)
7115 self.secondaries.append(self.op.snode)
7117 nodenames = [pnode.name] + self.secondaries
7119 req_size = _ComputeDiskSize(self.op.disk_template,
7122 # Check lv size requirements, if not adopting
7123 if req_size is not None and not self.adopt_disks:
7124 _CheckNodesFreeDisk(self, nodenames, req_size)
7126 if self.adopt_disks: # instead, we must check the adoption data
7127 all_lvs = set([i["adopt"] for i in self.disks])
7128 if len(all_lvs) != len(self.disks):
7129 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7131 for lv_name in all_lvs:
7133 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7134 except errors.ReservationError:
7135 raise errors.OpPrereqError("LV named %s used by another instance" %
7136 lv_name, errors.ECODE_NOTUNIQUE)
7138 node_lvs = self.rpc.call_lv_list([pnode.name],
7139 self.cfg.GetVGName())[pnode.name]
7140 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7141 node_lvs = node_lvs.payload
7142 delta = all_lvs.difference(node_lvs.keys())
7144 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7145 utils.CommaJoin(delta),
7147 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7149 raise errors.OpPrereqError("Online logical volumes found, cannot"
7150 " adopt: %s" % utils.CommaJoin(online_lvs),
7152 # update the size of disk based on what is found
7153 for dsk in self.disks:
7154 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7156 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7158 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7159 # check OS parameters (remotely)
7160 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7162 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7164 # memory check on primary node
7166 _CheckNodeFreeMemory(self, self.pnode.name,
7167 "creating instance %s" % self.op.instance_name,
7168 self.be_full[constants.BE_MEMORY],
7171 self.dry_run_result = list(nodenames)
7173 def Exec(self, feedback_fn):
7174 """Create and add the instance to the cluster.
7177 instance = self.op.instance_name
7178 pnode_name = self.pnode.name
7180 ht_kind = self.op.hypervisor
7181 if ht_kind in constants.HTS_REQ_PORT:
7182 network_port = self.cfg.AllocatePort()
7186 if constants.ENABLE_FILE_STORAGE:
7187 # this is needed because os.path.join does not accept None arguments
7188 if self.op.file_storage_dir is None:
7189 string_file_storage_dir = ""
7191 string_file_storage_dir = self.op.file_storage_dir
7193 # build the full file storage dir path
7194 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7195 string_file_storage_dir, instance)
7197 file_storage_dir = ""
7199 disks = _GenerateDiskTemplate(self,
7200 self.op.disk_template,
7201 instance, pnode_name,
7205 self.op.file_driver,
7208 iobj = objects.Instance(name=instance, os=self.op.os_type,
7209 primary_node=pnode_name,
7210 nics=self.nics, disks=disks,
7211 disk_template=self.op.disk_template,
7213 network_port=network_port,
7214 beparams=self.op.beparams,
7215 hvparams=self.op.hvparams,
7216 hypervisor=self.op.hypervisor,
7217 osparams=self.op.osparams,
7220 if self.adopt_disks:
7221 # rename LVs to the newly-generated names; we need to construct
7222 # 'fake' LV disks with the old data, plus the new unique_id
7223 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7225 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7226 rename_to.append(t_dsk.logical_id)
7227 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7228 self.cfg.SetDiskID(t_dsk, pnode_name)
7229 result = self.rpc.call_blockdev_rename(pnode_name,
7230 zip(tmp_disks, rename_to))
7231 result.Raise("Failed to rename adoped LVs")
7233 feedback_fn("* creating instance disks...")
7235 _CreateDisks(self, iobj)
7236 except errors.OpExecError:
7237 self.LogWarning("Device creation failed, reverting...")
7239 _RemoveDisks(self, iobj)
7241 self.cfg.ReleaseDRBDMinors(instance)
7244 feedback_fn("adding instance %s to cluster config" % instance)
7246 self.cfg.AddInstance(iobj, self.proc.GetECId())
7248 # Declare that we don't want to remove the instance lock anymore, as we've
7249 # added the instance to the config
7250 del self.remove_locks[locking.LEVEL_INSTANCE]
7251 # Unlock all the nodes
7252 if self.op.mode == constants.INSTANCE_IMPORT:
7253 nodes_keep = [self.op.src_node]
7254 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7255 if node != self.op.src_node]
7256 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7257 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7259 self.context.glm.release(locking.LEVEL_NODE)
7260 del self.acquired_locks[locking.LEVEL_NODE]
7262 if self.op.wait_for_sync:
7263 disk_abort = not _WaitForSync(self, iobj)
7264 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7265 # make sure the disks are not degraded (still sync-ing is ok)
7267 feedback_fn("* checking mirrors status")
7268 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7273 _RemoveDisks(self, iobj)
7274 self.cfg.RemoveInstance(iobj.name)
7275 # Make sure the instance lock gets removed
7276 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7277 raise errors.OpExecError("There are some degraded disks for"
7280 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7281 if self.op.mode == constants.INSTANCE_CREATE:
7282 if not self.op.no_install:
7283 feedback_fn("* running the instance OS create scripts...")
7284 # FIXME: pass debug option from opcode to backend
7285 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7286 self.op.debug_level)
7287 result.Raise("Could not add os for instance %s"
7288 " on node %s" % (instance, pnode_name))
7290 elif self.op.mode == constants.INSTANCE_IMPORT:
7291 feedback_fn("* running the instance OS import scripts...")
7295 for idx, image in enumerate(self.src_images):
7299 # FIXME: pass debug option from opcode to backend
7300 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7301 constants.IEIO_FILE, (image, ),
7302 constants.IEIO_SCRIPT,
7303 (iobj.disks[idx], idx),
7305 transfers.append(dt)
7308 masterd.instance.TransferInstanceData(self, feedback_fn,
7309 self.op.src_node, pnode_name,
7310 self.pnode.secondary_ip,
7312 if not compat.all(import_result):
7313 self.LogWarning("Some disks for instance %s on node %s were not"
7314 " imported successfully" % (instance, pnode_name))
7316 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7317 feedback_fn("* preparing remote import...")
7318 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7319 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7321 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7322 self.source_x509_ca,
7323 self._cds, timeouts)
7324 if not compat.all(disk_results):
7325 # TODO: Should the instance still be started, even if some disks
7326 # failed to import (valid for local imports, too)?
7327 self.LogWarning("Some disks for instance %s on node %s were not"
7328 " imported successfully" % (instance, pnode_name))
7330 # Run rename script on newly imported instance
7331 assert iobj.name == instance
7332 feedback_fn("Running rename script for %s" % instance)
7333 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7334 self.source_instance_name,
7335 self.op.debug_level)
7337 self.LogWarning("Failed to run rename script for %s on node"
7338 " %s: %s" % (instance, pnode_name, result.fail_msg))
7341 # also checked in the prereq part
7342 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7346 iobj.admin_up = True
7347 self.cfg.Update(iobj, feedback_fn)
7348 logging.info("Starting instance %s on node %s", instance, pnode_name)
7349 feedback_fn("* starting instance...")
7350 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7351 result.Raise("Could not start instance")
7353 return list(iobj.all_nodes)
7356 class LUConnectConsole(NoHooksLU):
7357 """Connect to an instance's console.
7359 This is somewhat special in that it returns the command line that
7360 you need to run on the master node in order to connect to the
7369 def ExpandNames(self):
7370 self._ExpandAndLockInstance()
7372 def CheckPrereq(self):
7373 """Check prerequisites.
7375 This checks that the instance is in the cluster.
7378 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7379 assert self.instance is not None, \
7380 "Cannot retrieve locked instance %s" % self.op.instance_name
7381 _CheckNodeOnline(self, self.instance.primary_node)
7383 def Exec(self, feedback_fn):
7384 """Connect to the console of an instance
7387 instance = self.instance
7388 node = instance.primary_node
7390 node_insts = self.rpc.call_instance_list([node],
7391 [instance.hypervisor])[node]
7392 node_insts.Raise("Can't get node information from %s" % node)
7394 if instance.name not in node_insts.payload:
7395 raise errors.OpExecError("Instance %s is not running." % instance.name)
7397 logging.debug("Connecting to console of %s on %s", instance.name, node)
7399 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7400 cluster = self.cfg.GetClusterInfo()
7401 # beparams and hvparams are passed separately, to avoid editing the
7402 # instance and then saving the defaults in the instance itself.
7403 hvparams = cluster.FillHV(instance)
7404 beparams = cluster.FillBE(instance)
7405 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7408 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7411 class LUReplaceDisks(LogicalUnit):
7412 """Replace the disks of an instance.
7415 HPATH = "mirrors-replace"
7416 HTYPE = constants.HTYPE_INSTANCE
7419 ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7420 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7421 ("remote_node", None, _TMaybeString),
7422 ("iallocator", None, _TMaybeString),
7423 ("early_release", False, _TBool),
7427 def CheckArguments(self):
7428 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7431 def ExpandNames(self):
7432 self._ExpandAndLockInstance()
7434 if self.op.iallocator is not None:
7435 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7437 elif self.op.remote_node is not None:
7438 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7439 self.op.remote_node = remote_node
7441 # Warning: do not remove the locking of the new secondary here
7442 # unless DRBD8.AddChildren is changed to work in parallel;
7443 # currently it doesn't since parallel invocations of
7444 # FindUnusedMinor will conflict
7445 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7446 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7449 self.needed_locks[locking.LEVEL_NODE] = []
7450 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7452 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7453 self.op.iallocator, self.op.remote_node,
7454 self.op.disks, False, self.op.early_release)
7456 self.tasklets = [self.replacer]
7458 def DeclareLocks(self, level):
7459 # If we're not already locking all nodes in the set we have to declare the
7460 # instance's primary/secondary nodes.
7461 if (level == locking.LEVEL_NODE and
7462 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7463 self._LockInstancesNodes()
7465 def BuildHooksEnv(self):
7468 This runs on the master, the primary and all the secondaries.
7471 instance = self.replacer.instance
7473 "MODE": self.op.mode,
7474 "NEW_SECONDARY": self.op.remote_node,
7475 "OLD_SECONDARY": instance.secondary_nodes[0],
7477 env.update(_BuildInstanceHookEnvByObject(self, instance))
7479 self.cfg.GetMasterNode(),
7480 instance.primary_node,
7482 if self.op.remote_node is not None:
7483 nl.append(self.op.remote_node)
7487 class TLReplaceDisks(Tasklet):
7488 """Replaces disks for an instance.
7490 Note: Locking is not within the scope of this class.
7493 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7494 disks, delay_iallocator, early_release):
7495 """Initializes this class.
7498 Tasklet.__init__(self, lu)
7501 self.instance_name = instance_name
7503 self.iallocator_name = iallocator_name
7504 self.remote_node = remote_node
7506 self.delay_iallocator = delay_iallocator
7507 self.early_release = early_release
7510 self.instance = None
7511 self.new_node = None
7512 self.target_node = None
7513 self.other_node = None
7514 self.remote_node_info = None
7515 self.node_secondary_ip = None
7518 def CheckArguments(mode, remote_node, iallocator):
7519 """Helper function for users of this class.
7522 # check for valid parameter combination
7523 if mode == constants.REPLACE_DISK_CHG:
7524 if remote_node is None and iallocator is None:
7525 raise errors.OpPrereqError("When changing the secondary either an"
7526 " iallocator script must be used or the"
7527 " new node given", errors.ECODE_INVAL)
7529 if remote_node is not None and iallocator is not None:
7530 raise errors.OpPrereqError("Give either the iallocator or the new"
7531 " secondary, not both", errors.ECODE_INVAL)
7533 elif remote_node is not None or iallocator is not None:
7534 # Not replacing the secondary
7535 raise errors.OpPrereqError("The iallocator and new node options can"
7536 " only be used when changing the"
7537 " secondary node", errors.ECODE_INVAL)
7540 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7541 """Compute a new secondary node using an IAllocator.
7544 ial = IAllocator(lu.cfg, lu.rpc,
7545 mode=constants.IALLOCATOR_MODE_RELOC,
7547 relocate_from=relocate_from)
7549 ial.Run(iallocator_name)
7552 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7553 " %s" % (iallocator_name, ial.info),
7556 if len(ial.result) != ial.required_nodes:
7557 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7558 " of nodes (%s), required %s" %
7560 len(ial.result), ial.required_nodes),
7563 remote_node_name = ial.result[0]
7565 lu.LogInfo("Selected new secondary for instance '%s': %s",
7566 instance_name, remote_node_name)
7568 return remote_node_name
7570 def _FindFaultyDisks(self, node_name):
7571 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7574 def CheckPrereq(self):
7575 """Check prerequisites.
7577 This checks that the instance is in the cluster.
7580 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7581 assert instance is not None, \
7582 "Cannot retrieve locked instance %s" % self.instance_name
7584 if instance.disk_template != constants.DT_DRBD8:
7585 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7586 " instances", errors.ECODE_INVAL)
7588 if len(instance.secondary_nodes) != 1:
7589 raise errors.OpPrereqError("The instance has a strange layout,"
7590 " expected one secondary but found %d" %
7591 len(instance.secondary_nodes),
7594 if not self.delay_iallocator:
7595 self._CheckPrereq2()
7597 def _CheckPrereq2(self):
7598 """Check prerequisites, second part.
7600 This function should always be part of CheckPrereq. It was separated and is
7601 now called from Exec because during node evacuation iallocator was only
7602 called with an unmodified cluster model, not taking planned changes into
7606 instance = self.instance
7607 secondary_node = instance.secondary_nodes[0]
7609 if self.iallocator_name is None:
7610 remote_node = self.remote_node
7612 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7613 instance.name, instance.secondary_nodes)
7615 if remote_node is not None:
7616 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7617 assert self.remote_node_info is not None, \
7618 "Cannot retrieve locked node %s" % remote_node
7620 self.remote_node_info = None
7622 if remote_node == self.instance.primary_node:
7623 raise errors.OpPrereqError("The specified node is the primary node of"
7624 " the instance.", errors.ECODE_INVAL)
7626 if remote_node == secondary_node:
7627 raise errors.OpPrereqError("The specified node is already the"
7628 " secondary node of the instance.",
7631 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7632 constants.REPLACE_DISK_CHG):
7633 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7636 if self.mode == constants.REPLACE_DISK_AUTO:
7637 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7638 faulty_secondary = self._FindFaultyDisks(secondary_node)
7640 if faulty_primary and faulty_secondary:
7641 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7642 " one node and can not be repaired"
7643 " automatically" % self.instance_name,
7647 self.disks = faulty_primary
7648 self.target_node = instance.primary_node
7649 self.other_node = secondary_node
7650 check_nodes = [self.target_node, self.other_node]
7651 elif faulty_secondary:
7652 self.disks = faulty_secondary
7653 self.target_node = secondary_node
7654 self.other_node = instance.primary_node
7655 check_nodes = [self.target_node, self.other_node]
7661 # Non-automatic modes
7662 if self.mode == constants.REPLACE_DISK_PRI:
7663 self.target_node = instance.primary_node
7664 self.other_node = secondary_node
7665 check_nodes = [self.target_node, self.other_node]
7667 elif self.mode == constants.REPLACE_DISK_SEC:
7668 self.target_node = secondary_node
7669 self.other_node = instance.primary_node
7670 check_nodes = [self.target_node, self.other_node]
7672 elif self.mode == constants.REPLACE_DISK_CHG:
7673 self.new_node = remote_node
7674 self.other_node = instance.primary_node
7675 self.target_node = secondary_node
7676 check_nodes = [self.new_node, self.other_node]
7678 _CheckNodeNotDrained(self.lu, remote_node)
7680 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7681 assert old_node_info is not None
7682 if old_node_info.offline and not self.early_release:
7683 # doesn't make sense to delay the release
7684 self.early_release = True
7685 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7686 " early-release mode", secondary_node)
7689 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7692 # If not specified all disks should be replaced
7694 self.disks = range(len(self.instance.disks))
7696 for node in check_nodes:
7697 _CheckNodeOnline(self.lu, node)
7699 # Check whether disks are valid
7700 for disk_idx in self.disks:
7701 instance.FindDisk(disk_idx)
7703 # Get secondary node IP addresses
7706 for node_name in [self.target_node, self.other_node, self.new_node]:
7707 if node_name is not None:
7708 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7710 self.node_secondary_ip = node_2nd_ip
7712 def Exec(self, feedback_fn):
7713 """Execute disk replacement.
7715 This dispatches the disk replacement to the appropriate handler.
7718 if self.delay_iallocator:
7719 self._CheckPrereq2()
7722 feedback_fn("No disks need replacement")
7725 feedback_fn("Replacing disk(s) %s for %s" %
7726 (utils.CommaJoin(self.disks), self.instance.name))
7728 activate_disks = (not self.instance.admin_up)
7730 # Activate the instance disks if we're replacing them on a down instance
7732 _StartInstanceDisks(self.lu, self.instance, True)
7735 # Should we replace the secondary node?
7736 if self.new_node is not None:
7737 fn = self._ExecDrbd8Secondary
7739 fn = self._ExecDrbd8DiskOnly
7741 return fn(feedback_fn)
7744 # Deactivate the instance disks if we're replacing them on a
7747 _SafeShutdownInstanceDisks(self.lu, self.instance)
7749 def _CheckVolumeGroup(self, nodes):
7750 self.lu.LogInfo("Checking volume groups")
7752 vgname = self.cfg.GetVGName()
7754 # Make sure volume group exists on all involved nodes
7755 results = self.rpc.call_vg_list(nodes)
7757 raise errors.OpExecError("Can't list volume groups on the nodes")
7761 res.Raise("Error checking node %s" % node)
7762 if vgname not in res.payload:
7763 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7766 def _CheckDisksExistence(self, nodes):
7767 # Check disk existence
7768 for idx, dev in enumerate(self.instance.disks):
7769 if idx not in self.disks:
7773 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7774 self.cfg.SetDiskID(dev, node)
7776 result = self.rpc.call_blockdev_find(node, dev)
7778 msg = result.fail_msg
7779 if msg or not result.payload:
7781 msg = "disk not found"
7782 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7785 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7786 for idx, dev in enumerate(self.instance.disks):
7787 if idx not in self.disks:
7790 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7793 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7795 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7796 " replace disks for instance %s" %
7797 (node_name, self.instance.name))
7799 def _CreateNewStorage(self, node_name):
7800 vgname = self.cfg.GetVGName()
7803 for idx, dev in enumerate(self.instance.disks):
7804 if idx not in self.disks:
7807 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7809 self.cfg.SetDiskID(dev, node_name)
7811 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7812 names = _GenerateUniqueNames(self.lu, lv_names)
7814 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7815 logical_id=(vgname, names[0]))
7816 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7817 logical_id=(vgname, names[1]))
7819 new_lvs = [lv_data, lv_meta]
7820 old_lvs = dev.children
7821 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7823 # we pass force_create=True to force the LVM creation
7824 for new_lv in new_lvs:
7825 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7826 _GetInstanceInfoText(self.instance), False)
7830 def _CheckDevices(self, node_name, iv_names):
7831 for name, (dev, _, _) in iv_names.iteritems():
7832 self.cfg.SetDiskID(dev, node_name)
7834 result = self.rpc.call_blockdev_find(node_name, dev)
7836 msg = result.fail_msg
7837 if msg or not result.payload:
7839 msg = "disk not found"
7840 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7843 if result.payload.is_degraded:
7844 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7846 def _RemoveOldStorage(self, node_name, iv_names):
7847 for name, (_, old_lvs, _) in iv_names.iteritems():
7848 self.lu.LogInfo("Remove logical volumes for %s" % name)
7851 self.cfg.SetDiskID(lv, node_name)
7853 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7855 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7856 hint="remove unused LVs manually")
7858 def _ReleaseNodeLock(self, node_name):
7859 """Releases the lock for a given node."""
7860 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7862 def _ExecDrbd8DiskOnly(self, feedback_fn):
7863 """Replace a disk on the primary or secondary for DRBD 8.
7865 The algorithm for replace is quite complicated:
7867 1. for each disk to be replaced:
7869 1. create new LVs on the target node with unique names
7870 1. detach old LVs from the drbd device
7871 1. rename old LVs to name_replaced.<time_t>
7872 1. rename new LVs to old LVs
7873 1. attach the new LVs (with the old names now) to the drbd device
7875 1. wait for sync across all devices
7877 1. for each modified disk:
7879 1. remove old LVs (which have the name name_replaces.<time_t>)
7881 Failures are not very well handled.
7886 # Step: check device activation
7887 self.lu.LogStep(1, steps_total, "Check device existence")
7888 self._CheckDisksExistence([self.other_node, self.target_node])
7889 self._CheckVolumeGroup([self.target_node, self.other_node])
7891 # Step: check other node consistency
7892 self.lu.LogStep(2, steps_total, "Check peer consistency")
7893 self._CheckDisksConsistency(self.other_node,
7894 self.other_node == self.instance.primary_node,
7897 # Step: create new storage
7898 self.lu.LogStep(3, steps_total, "Allocate new storage")
7899 iv_names = self._CreateNewStorage(self.target_node)
7901 # Step: for each lv, detach+rename*2+attach
7902 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7903 for dev, old_lvs, new_lvs in iv_names.itervalues():
7904 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7906 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7908 result.Raise("Can't detach drbd from local storage on node"
7909 " %s for device %s" % (self.target_node, dev.iv_name))
7911 #cfg.Update(instance)
7913 # ok, we created the new LVs, so now we know we have the needed
7914 # storage; as such, we proceed on the target node to rename
7915 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7916 # using the assumption that logical_id == physical_id (which in
7917 # turn is the unique_id on that node)
7919 # FIXME(iustin): use a better name for the replaced LVs
7920 temp_suffix = int(time.time())
7921 ren_fn = lambda d, suff: (d.physical_id[0],
7922 d.physical_id[1] + "_replaced-%s" % suff)
7924 # Build the rename list based on what LVs exist on the node
7925 rename_old_to_new = []
7926 for to_ren in old_lvs:
7927 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7928 if not result.fail_msg and result.payload:
7930 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7932 self.lu.LogInfo("Renaming the old LVs on the target node")
7933 result = self.rpc.call_blockdev_rename(self.target_node,
7935 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7937 # Now we rename the new LVs to the old LVs
7938 self.lu.LogInfo("Renaming the new LVs on the target node")
7939 rename_new_to_old = [(new, old.physical_id)
7940 for old, new in zip(old_lvs, new_lvs)]
7941 result = self.rpc.call_blockdev_rename(self.target_node,
7943 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7945 for old, new in zip(old_lvs, new_lvs):
7946 new.logical_id = old.logical_id
7947 self.cfg.SetDiskID(new, self.target_node)
7949 for disk in old_lvs:
7950 disk.logical_id = ren_fn(disk, temp_suffix)
7951 self.cfg.SetDiskID(disk, self.target_node)
7953 # Now that the new lvs have the old name, we can add them to the device
7954 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7955 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7957 msg = result.fail_msg
7959 for new_lv in new_lvs:
7960 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7963 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7964 hint=("cleanup manually the unused logical"
7966 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7968 dev.children = new_lvs
7970 self.cfg.Update(self.instance, feedback_fn)
7973 if self.early_release:
7974 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7976 self._RemoveOldStorage(self.target_node, iv_names)
7977 # WARNING: we release both node locks here, do not do other RPCs
7978 # than WaitForSync to the primary node
7979 self._ReleaseNodeLock([self.target_node, self.other_node])
7982 # This can fail as the old devices are degraded and _WaitForSync
7983 # does a combined result over all disks, so we don't check its return value
7984 self.lu.LogStep(cstep, steps_total, "Sync devices")
7986 _WaitForSync(self.lu, self.instance)
7988 # Check all devices manually
7989 self._CheckDevices(self.instance.primary_node, iv_names)
7991 # Step: remove old storage
7992 if not self.early_release:
7993 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7995 self._RemoveOldStorage(self.target_node, iv_names)
7997 def _ExecDrbd8Secondary(self, feedback_fn):
7998 """Replace the secondary node for DRBD 8.
8000 The algorithm for replace is quite complicated:
8001 - for all disks of the instance:
8002 - create new LVs on the new node with same names
8003 - shutdown the drbd device on the old secondary
8004 - disconnect the drbd network on the primary
8005 - create the drbd device on the new secondary
8006 - network attach the drbd on the primary, using an artifice:
8007 the drbd code for Attach() will connect to the network if it
8008 finds a device which is connected to the good local disks but
8010 - wait for sync across all devices
8011 - remove all disks from the old secondary
8013 Failures are not very well handled.
8018 # Step: check device activation
8019 self.lu.LogStep(1, steps_total, "Check device existence")
8020 self._CheckDisksExistence([self.instance.primary_node])
8021 self._CheckVolumeGroup([self.instance.primary_node])
8023 # Step: check other node consistency
8024 self.lu.LogStep(2, steps_total, "Check peer consistency")
8025 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8027 # Step: create new storage
8028 self.lu.LogStep(3, steps_total, "Allocate new storage")
8029 for idx, dev in enumerate(self.instance.disks):
8030 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8031 (self.new_node, idx))
8032 # we pass force_create=True to force LVM creation
8033 for new_lv in dev.children:
8034 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8035 _GetInstanceInfoText(self.instance), False)
8037 # Step 4: dbrd minors and drbd setups changes
8038 # after this, we must manually remove the drbd minors on both the
8039 # error and the success paths
8040 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8041 minors = self.cfg.AllocateDRBDMinor([self.new_node
8042 for dev in self.instance.disks],
8044 logging.debug("Allocated minors %r", minors)
8047 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8048 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8049 (self.new_node, idx))
8050 # create new devices on new_node; note that we create two IDs:
8051 # one without port, so the drbd will be activated without
8052 # networking information on the new node at this stage, and one
8053 # with network, for the latter activation in step 4
8054 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8055 if self.instance.primary_node == o_node1:
8058 assert self.instance.primary_node == o_node2, "Three-node instance?"
8061 new_alone_id = (self.instance.primary_node, self.new_node, None,
8062 p_minor, new_minor, o_secret)
8063 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8064 p_minor, new_minor, o_secret)
8066 iv_names[idx] = (dev, dev.children, new_net_id)
8067 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8069 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8070 logical_id=new_alone_id,
8071 children=dev.children,
8074 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8075 _GetInstanceInfoText(self.instance), False)
8076 except errors.GenericError:
8077 self.cfg.ReleaseDRBDMinors(self.instance.name)
8080 # We have new devices, shutdown the drbd on the old secondary
8081 for idx, dev in enumerate(self.instance.disks):
8082 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8083 self.cfg.SetDiskID(dev, self.target_node)
8084 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8086 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8087 "node: %s" % (idx, msg),
8088 hint=("Please cleanup this device manually as"
8089 " soon as possible"))
8091 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8092 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8093 self.node_secondary_ip,
8094 self.instance.disks)\
8095 [self.instance.primary_node]
8097 msg = result.fail_msg
8099 # detaches didn't succeed (unlikely)
8100 self.cfg.ReleaseDRBDMinors(self.instance.name)
8101 raise errors.OpExecError("Can't detach the disks from the network on"
8102 " old node: %s" % (msg,))
8104 # if we managed to detach at least one, we update all the disks of
8105 # the instance to point to the new secondary
8106 self.lu.LogInfo("Updating instance configuration")
8107 for dev, _, new_logical_id in iv_names.itervalues():
8108 dev.logical_id = new_logical_id
8109 self.cfg.SetDiskID(dev, self.instance.primary_node)
8111 self.cfg.Update(self.instance, feedback_fn)
8113 # and now perform the drbd attach
8114 self.lu.LogInfo("Attaching primary drbds to new secondary"
8115 " (standalone => connected)")
8116 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8118 self.node_secondary_ip,
8119 self.instance.disks,
8122 for to_node, to_result in result.items():
8123 msg = to_result.fail_msg
8125 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8127 hint=("please do a gnt-instance info to see the"
8128 " status of disks"))
8130 if self.early_release:
8131 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8133 self._RemoveOldStorage(self.target_node, iv_names)
8134 # WARNING: we release all node locks here, do not do other RPCs
8135 # than WaitForSync to the primary node
8136 self._ReleaseNodeLock([self.instance.primary_node,
8141 # This can fail as the old devices are degraded and _WaitForSync
8142 # does a combined result over all disks, so we don't check its return value
8143 self.lu.LogStep(cstep, steps_total, "Sync devices")
8145 _WaitForSync(self.lu, self.instance)
8147 # Check all devices manually
8148 self._CheckDevices(self.instance.primary_node, iv_names)
8150 # Step: remove old storage
8151 if not self.early_release:
8152 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8153 self._RemoveOldStorage(self.target_node, iv_names)
8156 class LURepairNodeStorage(NoHooksLU):
8157 """Repairs the volume group on a node.
8162 ("storage_type", _NoDefault, _CheckStorageType),
8163 ("name", _NoDefault, _TNonEmptyString),
8164 ("ignore_consistency", False, _TBool),
8168 def CheckArguments(self):
8169 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8171 storage_type = self.op.storage_type
8173 if (constants.SO_FIX_CONSISTENCY not in
8174 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8175 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8176 " repaired" % storage_type,
8179 def ExpandNames(self):
8180 self.needed_locks = {
8181 locking.LEVEL_NODE: [self.op.node_name],
8184 def _CheckFaultyDisks(self, instance, node_name):
8185 """Ensure faulty disks abort the opcode or at least warn."""
8187 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8189 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8190 " node '%s'" % (instance.name, node_name),
8192 except errors.OpPrereqError, err:
8193 if self.op.ignore_consistency:
8194 self.proc.LogWarning(str(err.args[0]))
8198 def CheckPrereq(self):
8199 """Check prerequisites.
8202 # Check whether any instance on this node has faulty disks
8203 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8204 if not inst.admin_up:
8206 check_nodes = set(inst.all_nodes)
8207 check_nodes.discard(self.op.node_name)
8208 for inst_node_name in check_nodes:
8209 self._CheckFaultyDisks(inst, inst_node_name)
8211 def Exec(self, feedback_fn):
8212 feedback_fn("Repairing storage unit '%s' on %s ..." %
8213 (self.op.name, self.op.node_name))
8215 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8216 result = self.rpc.call_storage_execute(self.op.node_name,
8217 self.op.storage_type, st_args,
8219 constants.SO_FIX_CONSISTENCY)
8220 result.Raise("Failed to repair storage unit '%s' on %s" %
8221 (self.op.name, self.op.node_name))
8224 class LUNodeEvacuationStrategy(NoHooksLU):
8225 """Computes the node evacuation strategy.
8229 ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8230 ("remote_node", None, _TMaybeString),
8231 ("iallocator", None, _TMaybeString),
8235 def CheckArguments(self):
8236 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8238 def ExpandNames(self):
8239 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8240 self.needed_locks = locks = {}
8241 if self.op.remote_node is None:
8242 locks[locking.LEVEL_NODE] = locking.ALL_SET
8244 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8245 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8247 def Exec(self, feedback_fn):
8248 if self.op.remote_node is not None:
8250 for node in self.op.nodes:
8251 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8254 if i.primary_node == self.op.remote_node:
8255 raise errors.OpPrereqError("Node %s is the primary node of"
8256 " instance %s, cannot use it as"
8258 (self.op.remote_node, i.name),
8260 result.append([i.name, self.op.remote_node])
8262 ial = IAllocator(self.cfg, self.rpc,
8263 mode=constants.IALLOCATOR_MODE_MEVAC,
8264 evac_nodes=self.op.nodes)
8265 ial.Run(self.op.iallocator, validate=True)
8267 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8273 class LUGrowDisk(LogicalUnit):
8274 """Grow a disk of an instance.
8278 HTYPE = constants.HTYPE_INSTANCE
8281 ("disk", _NoDefault, _TInt),
8282 ("amount", _NoDefault, _TInt),
8283 ("wait_for_sync", True, _TBool),
8287 def ExpandNames(self):
8288 self._ExpandAndLockInstance()
8289 self.needed_locks[locking.LEVEL_NODE] = []
8290 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8292 def DeclareLocks(self, level):
8293 if level == locking.LEVEL_NODE:
8294 self._LockInstancesNodes()
8296 def BuildHooksEnv(self):
8299 This runs on the master, the primary and all the secondaries.
8303 "DISK": self.op.disk,
8304 "AMOUNT": self.op.amount,
8306 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8307 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8310 def CheckPrereq(self):
8311 """Check prerequisites.
8313 This checks that the instance is in the cluster.
8316 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8317 assert instance is not None, \
8318 "Cannot retrieve locked instance %s" % self.op.instance_name
8319 nodenames = list(instance.all_nodes)
8320 for node in nodenames:
8321 _CheckNodeOnline(self, node)
8323 self.instance = instance
8325 if instance.disk_template not in constants.DTS_GROWABLE:
8326 raise errors.OpPrereqError("Instance's disk layout does not support"
8327 " growing.", errors.ECODE_INVAL)
8329 self.disk = instance.FindDisk(self.op.disk)
8331 if instance.disk_template != constants.DT_FILE:
8332 # TODO: check the free disk space for file, when that feature will be
8334 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8336 def Exec(self, feedback_fn):
8337 """Execute disk grow.
8340 instance = self.instance
8343 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8345 raise errors.OpExecError("Cannot activate block device to grow")
8347 for node in instance.all_nodes:
8348 self.cfg.SetDiskID(disk, node)
8349 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8350 result.Raise("Grow request failed to node %s" % node)
8352 # TODO: Rewrite code to work properly
8353 # DRBD goes into sync mode for a short amount of time after executing the
8354 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8355 # calling "resize" in sync mode fails. Sleeping for a short amount of
8356 # time is a work-around.
8359 disk.RecordGrow(self.op.amount)
8360 self.cfg.Update(instance, feedback_fn)
8361 if self.op.wait_for_sync:
8362 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8364 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8365 " status.\nPlease check the instance.")
8366 if not instance.admin_up:
8367 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8368 elif not instance.admin_up:
8369 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8370 " not supposed to be running because no wait for"
8371 " sync mode was requested.")
8374 class LUQueryInstanceData(NoHooksLU):
8375 """Query runtime instance data.
8379 ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8380 ("static", False, _TBool),
8384 def ExpandNames(self):
8385 self.needed_locks = {}
8386 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8388 if self.op.instances:
8389 self.wanted_names = []
8390 for name in self.op.instances:
8391 full_name = _ExpandInstanceName(self.cfg, name)
8392 self.wanted_names.append(full_name)
8393 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8395 self.wanted_names = None
8396 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8398 self.needed_locks[locking.LEVEL_NODE] = []
8399 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8401 def DeclareLocks(self, level):
8402 if level == locking.LEVEL_NODE:
8403 self._LockInstancesNodes()
8405 def CheckPrereq(self):
8406 """Check prerequisites.
8408 This only checks the optional instance list against the existing names.
8411 if self.wanted_names is None:
8412 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8414 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8415 in self.wanted_names]
8417 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8418 """Returns the status of a block device
8421 if self.op.static or not node:
8424 self.cfg.SetDiskID(dev, node)
8426 result = self.rpc.call_blockdev_find(node, dev)
8430 result.Raise("Can't compute disk status for %s" % instance_name)
8432 status = result.payload
8436 return (status.dev_path, status.major, status.minor,
8437 status.sync_percent, status.estimated_time,
8438 status.is_degraded, status.ldisk_status)
8440 def _ComputeDiskStatus(self, instance, snode, dev):
8441 """Compute block device status.
8444 if dev.dev_type in constants.LDS_DRBD:
8445 # we change the snode then (otherwise we use the one passed in)
8446 if dev.logical_id[0] == instance.primary_node:
8447 snode = dev.logical_id[1]
8449 snode = dev.logical_id[0]
8451 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8453 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8456 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8457 for child in dev.children]
8462 "iv_name": dev.iv_name,
8463 "dev_type": dev.dev_type,
8464 "logical_id": dev.logical_id,
8465 "physical_id": dev.physical_id,
8466 "pstatus": dev_pstatus,
8467 "sstatus": dev_sstatus,
8468 "children": dev_children,
8475 def Exec(self, feedback_fn):
8476 """Gather and return data"""
8479 cluster = self.cfg.GetClusterInfo()
8481 for instance in self.wanted_instances:
8482 if not self.op.static:
8483 remote_info = self.rpc.call_instance_info(instance.primary_node,
8485 instance.hypervisor)
8486 remote_info.Raise("Error checking node %s" % instance.primary_node)
8487 remote_info = remote_info.payload
8488 if remote_info and "state" in remote_info:
8491 remote_state = "down"
8494 if instance.admin_up:
8497 config_state = "down"
8499 disks = [self._ComputeDiskStatus(instance, None, device)
8500 for device in instance.disks]
8503 "name": instance.name,
8504 "config_state": config_state,
8505 "run_state": remote_state,
8506 "pnode": instance.primary_node,
8507 "snodes": instance.secondary_nodes,
8509 # this happens to be the same format used for hooks
8510 "nics": _NICListToTuple(self, instance.nics),
8511 "disk_template": instance.disk_template,
8513 "hypervisor": instance.hypervisor,
8514 "network_port": instance.network_port,
8515 "hv_instance": instance.hvparams,
8516 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8517 "be_instance": instance.beparams,
8518 "be_actual": cluster.FillBE(instance),
8519 "os_instance": instance.osparams,
8520 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8521 "serial_no": instance.serial_no,
8522 "mtime": instance.mtime,
8523 "ctime": instance.ctime,
8524 "uuid": instance.uuid,
8527 result[instance.name] = idict
8532 class LUSetInstanceParams(LogicalUnit):
8533 """Modifies an instances's parameters.
8536 HPATH = "instance-modify"
8537 HTYPE = constants.HTYPE_INSTANCE
8540 ("nics", _EmptyList, _TList),
8541 ("disks", _EmptyList, _TList),
8542 ("beparams", _EmptyDict, _TDict),
8543 ("hvparams", _EmptyDict, _TDict),
8544 ("disk_template", None, _TMaybeString),
8545 ("remote_node", None, _TMaybeString),
8546 ("os_name", None, _TMaybeString),
8547 ("force_variant", False, _TBool),
8548 ("osparams", None, _TOr(_TDict, _TNone)),
8553 def CheckArguments(self):
8554 if not (self.op.nics or self.op.disks or self.op.disk_template or
8555 self.op.hvparams or self.op.beparams or self.op.os_name):
8556 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8558 if self.op.hvparams:
8559 _CheckGlobalHvParams(self.op.hvparams)
8563 for disk_op, disk_dict in self.op.disks:
8564 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8565 if disk_op == constants.DDM_REMOVE:
8568 elif disk_op == constants.DDM_ADD:
8571 if not isinstance(disk_op, int):
8572 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8573 if not isinstance(disk_dict, dict):
8574 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8575 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8577 if disk_op == constants.DDM_ADD:
8578 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8579 if mode not in constants.DISK_ACCESS_SET:
8580 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8582 size = disk_dict.get('size', None)
8584 raise errors.OpPrereqError("Required disk parameter size missing",
8588 except (TypeError, ValueError), err:
8589 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8590 str(err), errors.ECODE_INVAL)
8591 disk_dict['size'] = size
8593 # modification of disk
8594 if 'size' in disk_dict:
8595 raise errors.OpPrereqError("Disk size change not possible, use"
8596 " grow-disk", errors.ECODE_INVAL)
8598 if disk_addremove > 1:
8599 raise errors.OpPrereqError("Only one disk add or remove operation"
8600 " supported at a time", errors.ECODE_INVAL)
8602 if self.op.disks and self.op.disk_template is not None:
8603 raise errors.OpPrereqError("Disk template conversion and other disk"
8604 " changes not supported at the same time",
8607 if self.op.disk_template:
8608 _CheckDiskTemplate(self.op.disk_template)
8609 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8610 self.op.remote_node is None):
8611 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8612 " one requires specifying a secondary node",
8617 for nic_op, nic_dict in self.op.nics:
8618 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8619 if nic_op == constants.DDM_REMOVE:
8622 elif nic_op == constants.DDM_ADD:
8625 if not isinstance(nic_op, int):
8626 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8627 if not isinstance(nic_dict, dict):
8628 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8629 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8631 # nic_dict should be a dict
8632 nic_ip = nic_dict.get('ip', None)
8633 if nic_ip is not None:
8634 if nic_ip.lower() == constants.VALUE_NONE:
8635 nic_dict['ip'] = None
8637 if not netutils.IsValidIP4(nic_ip):
8638 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8641 nic_bridge = nic_dict.get('bridge', None)
8642 nic_link = nic_dict.get('link', None)
8643 if nic_bridge and nic_link:
8644 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8645 " at the same time", errors.ECODE_INVAL)
8646 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8647 nic_dict['bridge'] = None
8648 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8649 nic_dict['link'] = None
8651 if nic_op == constants.DDM_ADD:
8652 nic_mac = nic_dict.get('mac', None)
8654 nic_dict['mac'] = constants.VALUE_AUTO
8656 if 'mac' in nic_dict:
8657 nic_mac = nic_dict['mac']
8658 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8659 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8661 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8662 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8663 " modifying an existing nic",
8666 if nic_addremove > 1:
8667 raise errors.OpPrereqError("Only one NIC add or remove operation"
8668 " supported at a time", errors.ECODE_INVAL)
8670 def ExpandNames(self):
8671 self._ExpandAndLockInstance()
8672 self.needed_locks[locking.LEVEL_NODE] = []
8673 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8675 def DeclareLocks(self, level):
8676 if level == locking.LEVEL_NODE:
8677 self._LockInstancesNodes()
8678 if self.op.disk_template and self.op.remote_node:
8679 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8680 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8682 def BuildHooksEnv(self):
8685 This runs on the master, primary and secondaries.
8689 if constants.BE_MEMORY in self.be_new:
8690 args['memory'] = self.be_new[constants.BE_MEMORY]
8691 if constants.BE_VCPUS in self.be_new:
8692 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8693 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8694 # information at all.
8697 nic_override = dict(self.op.nics)
8698 for idx, nic in enumerate(self.instance.nics):
8699 if idx in nic_override:
8700 this_nic_override = nic_override[idx]
8702 this_nic_override = {}
8703 if 'ip' in this_nic_override:
8704 ip = this_nic_override['ip']
8707 if 'mac' in this_nic_override:
8708 mac = this_nic_override['mac']
8711 if idx in self.nic_pnew:
8712 nicparams = self.nic_pnew[idx]
8714 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8715 mode = nicparams[constants.NIC_MODE]
8716 link = nicparams[constants.NIC_LINK]
8717 args['nics'].append((ip, mac, mode, link))
8718 if constants.DDM_ADD in nic_override:
8719 ip = nic_override[constants.DDM_ADD].get('ip', None)
8720 mac = nic_override[constants.DDM_ADD]['mac']
8721 nicparams = self.nic_pnew[constants.DDM_ADD]
8722 mode = nicparams[constants.NIC_MODE]
8723 link = nicparams[constants.NIC_LINK]
8724 args['nics'].append((ip, mac, mode, link))
8725 elif constants.DDM_REMOVE in nic_override:
8726 del args['nics'][-1]
8728 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8729 if self.op.disk_template:
8730 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8731 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8734 def CheckPrereq(self):
8735 """Check prerequisites.
8737 This only checks the instance list against the existing names.
8740 # checking the new params on the primary/secondary nodes
8742 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8743 cluster = self.cluster = self.cfg.GetClusterInfo()
8744 assert self.instance is not None, \
8745 "Cannot retrieve locked instance %s" % self.op.instance_name
8746 pnode = instance.primary_node
8747 nodelist = list(instance.all_nodes)
8750 if self.op.os_name and not self.op.force:
8751 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8752 self.op.force_variant)
8753 instance_os = self.op.os_name
8755 instance_os = instance.os
8757 if self.op.disk_template:
8758 if instance.disk_template == self.op.disk_template:
8759 raise errors.OpPrereqError("Instance already has disk template %s" %
8760 instance.disk_template, errors.ECODE_INVAL)
8762 if (instance.disk_template,
8763 self.op.disk_template) not in self._DISK_CONVERSIONS:
8764 raise errors.OpPrereqError("Unsupported disk template conversion from"
8765 " %s to %s" % (instance.disk_template,
8766 self.op.disk_template),
8768 _CheckInstanceDown(self, instance, "cannot change disk template")
8769 if self.op.disk_template in constants.DTS_NET_MIRROR:
8770 _CheckNodeOnline(self, self.op.remote_node)
8771 _CheckNodeNotDrained(self, self.op.remote_node)
8772 disks = [{"size": d.size} for d in instance.disks]
8773 required = _ComputeDiskSize(self.op.disk_template, disks)
8774 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8776 # hvparams processing
8777 if self.op.hvparams:
8778 hv_type = instance.hypervisor
8779 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8780 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8781 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8784 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8785 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8786 self.hv_new = hv_new # the new actual values
8787 self.hv_inst = i_hvdict # the new dict (without defaults)
8789 self.hv_new = self.hv_inst = {}
8791 # beparams processing
8792 if self.op.beparams:
8793 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8795 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8796 be_new = cluster.SimpleFillBE(i_bedict)
8797 self.be_new = be_new # the new actual values
8798 self.be_inst = i_bedict # the new dict (without defaults)
8800 self.be_new = self.be_inst = {}
8802 # osparams processing
8803 if self.op.osparams:
8804 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8805 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8806 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8807 self.os_inst = i_osdict # the new dict (without defaults)
8809 self.os_new = self.os_inst = {}
8813 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8814 mem_check_list = [pnode]
8815 if be_new[constants.BE_AUTO_BALANCE]:
8816 # either we changed auto_balance to yes or it was from before
8817 mem_check_list.extend(instance.secondary_nodes)
8818 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8819 instance.hypervisor)
8820 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8821 instance.hypervisor)
8822 pninfo = nodeinfo[pnode]
8823 msg = pninfo.fail_msg
8825 # Assume the primary node is unreachable and go ahead
8826 self.warn.append("Can't get info from primary node %s: %s" %
8828 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8829 self.warn.append("Node data from primary node %s doesn't contain"
8830 " free memory information" % pnode)
8831 elif instance_info.fail_msg:
8832 self.warn.append("Can't get instance runtime information: %s" %
8833 instance_info.fail_msg)
8835 if instance_info.payload:
8836 current_mem = int(instance_info.payload['memory'])
8838 # Assume instance not running
8839 # (there is a slight race condition here, but it's not very probable,
8840 # and we have no other way to check)
8842 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8843 pninfo.payload['memory_free'])
8845 raise errors.OpPrereqError("This change will prevent the instance"
8846 " from starting, due to %d MB of memory"
8847 " missing on its primary node" % miss_mem,
8850 if be_new[constants.BE_AUTO_BALANCE]:
8851 for node, nres in nodeinfo.items():
8852 if node not in instance.secondary_nodes:
8856 self.warn.append("Can't get info from secondary node %s: %s" %
8858 elif not isinstance(nres.payload.get('memory_free', None), int):
8859 self.warn.append("Secondary node %s didn't return free"
8860 " memory information" % node)
8861 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8862 self.warn.append("Not enough memory to failover instance to"
8863 " secondary node %s" % node)
8868 for nic_op, nic_dict in self.op.nics:
8869 if nic_op == constants.DDM_REMOVE:
8870 if not instance.nics:
8871 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8874 if nic_op != constants.DDM_ADD:
8876 if not instance.nics:
8877 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8878 " no NICs" % nic_op,
8880 if nic_op < 0 or nic_op >= len(instance.nics):
8881 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8883 (nic_op, len(instance.nics) - 1),
8885 old_nic_params = instance.nics[nic_op].nicparams
8886 old_nic_ip = instance.nics[nic_op].ip
8891 update_params_dict = dict([(key, nic_dict[key])
8892 for key in constants.NICS_PARAMETERS
8893 if key in nic_dict])
8895 if 'bridge' in nic_dict:
8896 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8898 new_nic_params = _GetUpdatedParams(old_nic_params,
8900 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8901 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8902 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8903 self.nic_pinst[nic_op] = new_nic_params
8904 self.nic_pnew[nic_op] = new_filled_nic_params
8905 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8907 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8908 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8909 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8911 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8913 self.warn.append(msg)
8915 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8916 if new_nic_mode == constants.NIC_MODE_ROUTED:
8917 if 'ip' in nic_dict:
8918 nic_ip = nic_dict['ip']
8922 raise errors.OpPrereqError('Cannot set the nic ip to None'
8923 ' on a routed nic', errors.ECODE_INVAL)
8924 if 'mac' in nic_dict:
8925 nic_mac = nic_dict['mac']
8927 raise errors.OpPrereqError('Cannot set the nic mac to None',
8929 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8930 # otherwise generate the mac
8931 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8933 # or validate/reserve the current one
8935 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8936 except errors.ReservationError:
8937 raise errors.OpPrereqError("MAC address %s already in use"
8938 " in cluster" % nic_mac,
8939 errors.ECODE_NOTUNIQUE)
8942 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8943 raise errors.OpPrereqError("Disk operations not supported for"
8944 " diskless instances",
8946 for disk_op, _ in self.op.disks:
8947 if disk_op == constants.DDM_REMOVE:
8948 if len(instance.disks) == 1:
8949 raise errors.OpPrereqError("Cannot remove the last disk of"
8950 " an instance", errors.ECODE_INVAL)
8951 _CheckInstanceDown(self, instance, "cannot remove disks")
8953 if (disk_op == constants.DDM_ADD and
8954 len(instance.nics) >= constants.MAX_DISKS):
8955 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8956 " add more" % constants.MAX_DISKS,
8958 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8960 if disk_op < 0 or disk_op >= len(instance.disks):
8961 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8963 (disk_op, len(instance.disks)),
8968 def _ConvertPlainToDrbd(self, feedback_fn):
8969 """Converts an instance from plain to drbd.
8972 feedback_fn("Converting template to drbd")
8973 instance = self.instance
8974 pnode = instance.primary_node
8975 snode = self.op.remote_node
8977 # create a fake disk info for _GenerateDiskTemplate
8978 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8979 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8980 instance.name, pnode, [snode],
8981 disk_info, None, None, 0)
8982 info = _GetInstanceInfoText(instance)
8983 feedback_fn("Creating aditional volumes...")
8984 # first, create the missing data and meta devices
8985 for disk in new_disks:
8986 # unfortunately this is... not too nice
8987 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8989 for child in disk.children:
8990 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8991 # at this stage, all new LVs have been created, we can rename the
8993 feedback_fn("Renaming original volumes...")
8994 rename_list = [(o, n.children[0].logical_id)
8995 for (o, n) in zip(instance.disks, new_disks)]
8996 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8997 result.Raise("Failed to rename original LVs")
8999 feedback_fn("Initializing DRBD devices...")
9000 # all child devices are in place, we can now create the DRBD devices
9001 for disk in new_disks:
9002 for node in [pnode, snode]:
9003 f_create = node == pnode
9004 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9006 # at this point, the instance has been modified
9007 instance.disk_template = constants.DT_DRBD8
9008 instance.disks = new_disks
9009 self.cfg.Update(instance, feedback_fn)
9011 # disks are created, waiting for sync
9012 disk_abort = not _WaitForSync(self, instance)
9014 raise errors.OpExecError("There are some degraded disks for"
9015 " this instance, please cleanup manually")
9017 def _ConvertDrbdToPlain(self, feedback_fn):
9018 """Converts an instance from drbd to plain.
9021 instance = self.instance
9022 assert len(instance.secondary_nodes) == 1
9023 pnode = instance.primary_node
9024 snode = instance.secondary_nodes[0]
9025 feedback_fn("Converting template to plain")
9027 old_disks = instance.disks
9028 new_disks = [d.children[0] for d in old_disks]
9030 # copy over size and mode
9031 for parent, child in zip(old_disks, new_disks):
9032 child.size = parent.size
9033 child.mode = parent.mode
9035 # update instance structure
9036 instance.disks = new_disks
9037 instance.disk_template = constants.DT_PLAIN
9038 self.cfg.Update(instance, feedback_fn)
9040 feedback_fn("Removing volumes on the secondary node...")
9041 for disk in old_disks:
9042 self.cfg.SetDiskID(disk, snode)
9043 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9045 self.LogWarning("Could not remove block device %s on node %s,"
9046 " continuing anyway: %s", disk.iv_name, snode, msg)
9048 feedback_fn("Removing unneeded volumes on the primary node...")
9049 for idx, disk in enumerate(old_disks):
9050 meta = disk.children[1]
9051 self.cfg.SetDiskID(meta, pnode)
9052 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9054 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9055 " continuing anyway: %s", idx, pnode, msg)
9058 def Exec(self, feedback_fn):
9059 """Modifies an instance.
9061 All parameters take effect only at the next restart of the instance.
9064 # Process here the warnings from CheckPrereq, as we don't have a
9065 # feedback_fn there.
9066 for warn in self.warn:
9067 feedback_fn("WARNING: %s" % warn)
9070 instance = self.instance
9072 for disk_op, disk_dict in self.op.disks:
9073 if disk_op == constants.DDM_REMOVE:
9074 # remove the last disk
9075 device = instance.disks.pop()
9076 device_idx = len(instance.disks)
9077 for node, disk in device.ComputeNodeTree(instance.primary_node):
9078 self.cfg.SetDiskID(disk, node)
9079 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9081 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9082 " continuing anyway", device_idx, node, msg)
9083 result.append(("disk/%d" % device_idx, "remove"))
9084 elif disk_op == constants.DDM_ADD:
9086 if instance.disk_template == constants.DT_FILE:
9087 file_driver, file_path = instance.disks[0].logical_id
9088 file_path = os.path.dirname(file_path)
9090 file_driver = file_path = None
9091 disk_idx_base = len(instance.disks)
9092 new_disk = _GenerateDiskTemplate(self,
9093 instance.disk_template,
9094 instance.name, instance.primary_node,
9095 instance.secondary_nodes,
9100 instance.disks.append(new_disk)
9101 info = _GetInstanceInfoText(instance)
9103 logging.info("Creating volume %s for instance %s",
9104 new_disk.iv_name, instance.name)
9105 # Note: this needs to be kept in sync with _CreateDisks
9107 for node in instance.all_nodes:
9108 f_create = node == instance.primary_node
9110 _CreateBlockDev(self, node, instance, new_disk,
9111 f_create, info, f_create)
9112 except errors.OpExecError, err:
9113 self.LogWarning("Failed to create volume %s (%s) on"
9115 new_disk.iv_name, new_disk, node, err)
9116 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9117 (new_disk.size, new_disk.mode)))
9119 # change a given disk
9120 instance.disks[disk_op].mode = disk_dict['mode']
9121 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9123 if self.op.disk_template:
9124 r_shut = _ShutdownInstanceDisks(self, instance)
9126 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9127 " proceed with disk template conversion")
9128 mode = (instance.disk_template, self.op.disk_template)
9130 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9132 self.cfg.ReleaseDRBDMinors(instance.name)
9134 result.append(("disk_template", self.op.disk_template))
9137 for nic_op, nic_dict in self.op.nics:
9138 if nic_op == constants.DDM_REMOVE:
9139 # remove the last nic
9140 del instance.nics[-1]
9141 result.append(("nic.%d" % len(instance.nics), "remove"))
9142 elif nic_op == constants.DDM_ADD:
9143 # mac and bridge should be set, by now
9144 mac = nic_dict['mac']
9145 ip = nic_dict.get('ip', None)
9146 nicparams = self.nic_pinst[constants.DDM_ADD]
9147 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9148 instance.nics.append(new_nic)
9149 result.append(("nic.%d" % (len(instance.nics) - 1),
9150 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9151 (new_nic.mac, new_nic.ip,
9152 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9153 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9156 for key in 'mac', 'ip':
9158 setattr(instance.nics[nic_op], key, nic_dict[key])
9159 if nic_op in self.nic_pinst:
9160 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9161 for key, val in nic_dict.iteritems():
9162 result.append(("nic.%s/%d" % (key, nic_op), val))
9165 if self.op.hvparams:
9166 instance.hvparams = self.hv_inst
9167 for key, val in self.op.hvparams.iteritems():
9168 result.append(("hv/%s" % key, val))
9171 if self.op.beparams:
9172 instance.beparams = self.be_inst
9173 for key, val in self.op.beparams.iteritems():
9174 result.append(("be/%s" % key, val))
9178 instance.os = self.op.os_name
9181 if self.op.osparams:
9182 instance.osparams = self.os_inst
9183 for key, val in self.op.osparams.iteritems():
9184 result.append(("os/%s" % key, val))
9186 self.cfg.Update(instance, feedback_fn)
9190 _DISK_CONVERSIONS = {
9191 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9192 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9196 class LUQueryExports(NoHooksLU):
9197 """Query the exports list
9201 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9202 ("use_locking", False, _TBool),
9206 def ExpandNames(self):
9207 self.needed_locks = {}
9208 self.share_locks[locking.LEVEL_NODE] = 1
9209 if not self.op.nodes:
9210 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9212 self.needed_locks[locking.LEVEL_NODE] = \
9213 _GetWantedNodes(self, self.op.nodes)
9215 def Exec(self, feedback_fn):
9216 """Compute the list of all the exported system images.
9219 @return: a dictionary with the structure node->(export-list)
9220 where export-list is a list of the instances exported on
9224 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9225 rpcresult = self.rpc.call_export_list(self.nodes)
9227 for node in rpcresult:
9228 if rpcresult[node].fail_msg:
9229 result[node] = False
9231 result[node] = rpcresult[node].payload
9236 class LUPrepareExport(NoHooksLU):
9237 """Prepares an instance for an export and returns useful information.
9242 ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9246 def ExpandNames(self):
9247 self._ExpandAndLockInstance()
9249 def CheckPrereq(self):
9250 """Check prerequisites.
9253 instance_name = self.op.instance_name
9255 self.instance = self.cfg.GetInstanceInfo(instance_name)
9256 assert self.instance is not None, \
9257 "Cannot retrieve locked instance %s" % self.op.instance_name
9258 _CheckNodeOnline(self, self.instance.primary_node)
9260 self._cds = _GetClusterDomainSecret()
9262 def Exec(self, feedback_fn):
9263 """Prepares an instance for an export.
9266 instance = self.instance
9268 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9269 salt = utils.GenerateSecret(8)
9271 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9272 result = self.rpc.call_x509_cert_create(instance.primary_node,
9273 constants.RIE_CERT_VALIDITY)
9274 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9276 (name, cert_pem) = result.payload
9278 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9282 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9283 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9285 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9291 class LUExportInstance(LogicalUnit):
9292 """Export an instance to an image in the cluster.
9295 HPATH = "instance-export"
9296 HTYPE = constants.HTYPE_INSTANCE
9299 ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9300 ("shutdown", True, _TBool),
9302 ("remove_instance", False, _TBool),
9303 ("ignore_remove_failures", False, _TBool),
9304 ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9305 ("x509_key_name", None, _TOr(_TList, _TNone)),
9306 ("destination_x509_ca", None, _TMaybeString),
9310 def CheckArguments(self):
9311 """Check the arguments.
9314 self.x509_key_name = self.op.x509_key_name
9315 self.dest_x509_ca_pem = self.op.destination_x509_ca
9317 if self.op.remove_instance and not self.op.shutdown:
9318 raise errors.OpPrereqError("Can not remove instance without shutting it"
9321 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9322 if not self.x509_key_name:
9323 raise errors.OpPrereqError("Missing X509 key name for encryption",
9326 if not self.dest_x509_ca_pem:
9327 raise errors.OpPrereqError("Missing destination X509 CA",
9330 def ExpandNames(self):
9331 self._ExpandAndLockInstance()
9333 # Lock all nodes for local exports
9334 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9335 # FIXME: lock only instance primary and destination node
9337 # Sad but true, for now we have do lock all nodes, as we don't know where
9338 # the previous export might be, and in this LU we search for it and
9339 # remove it from its current node. In the future we could fix this by:
9340 # - making a tasklet to search (share-lock all), then create the
9341 # new one, then one to remove, after
9342 # - removing the removal operation altogether
9343 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9345 def DeclareLocks(self, level):
9346 """Last minute lock declaration."""
9347 # All nodes are locked anyway, so nothing to do here.
9349 def BuildHooksEnv(self):
9352 This will run on the master, primary node and target node.
9356 "EXPORT_MODE": self.op.mode,
9357 "EXPORT_NODE": self.op.target_node,
9358 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9359 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9360 # TODO: Generic function for boolean env variables
9361 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9364 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9366 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9368 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9369 nl.append(self.op.target_node)
9373 def CheckPrereq(self):
9374 """Check prerequisites.
9376 This checks that the instance and node names are valid.
9379 instance_name = self.op.instance_name
9381 self.instance = self.cfg.GetInstanceInfo(instance_name)
9382 assert self.instance is not None, \
9383 "Cannot retrieve locked instance %s" % self.op.instance_name
9384 _CheckNodeOnline(self, self.instance.primary_node)
9386 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9387 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9388 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9389 assert self.dst_node is not None
9391 _CheckNodeOnline(self, self.dst_node.name)
9392 _CheckNodeNotDrained(self, self.dst_node.name)
9395 self.dest_disk_info = None
9396 self.dest_x509_ca = None
9398 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9399 self.dst_node = None
9401 if len(self.op.target_node) != len(self.instance.disks):
9402 raise errors.OpPrereqError(("Received destination information for %s"
9403 " disks, but instance %s has %s disks") %
9404 (len(self.op.target_node), instance_name,
9405 len(self.instance.disks)),
9408 cds = _GetClusterDomainSecret()
9410 # Check X509 key name
9412 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9413 except (TypeError, ValueError), err:
9414 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9416 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9417 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9420 # Load and verify CA
9422 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9423 except OpenSSL.crypto.Error, err:
9424 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9425 (err, ), errors.ECODE_INVAL)
9427 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9428 if errcode is not None:
9429 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9430 (msg, ), errors.ECODE_INVAL)
9432 self.dest_x509_ca = cert
9434 # Verify target information
9436 for idx, disk_data in enumerate(self.op.target_node):
9438 (host, port, magic) = \
9439 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9440 except errors.GenericError, err:
9441 raise errors.OpPrereqError("Target info for disk %s: %s" %
9442 (idx, err), errors.ECODE_INVAL)
9444 disk_info.append((host, port, magic))
9446 assert len(disk_info) == len(self.op.target_node)
9447 self.dest_disk_info = disk_info
9450 raise errors.ProgrammerError("Unhandled export mode %r" %
9453 # instance disk type verification
9454 # TODO: Implement export support for file-based disks
9455 for disk in self.instance.disks:
9456 if disk.dev_type == constants.LD_FILE:
9457 raise errors.OpPrereqError("Export not supported for instances with"
9458 " file-based disks", errors.ECODE_INVAL)
9460 def _CleanupExports(self, feedback_fn):
9461 """Removes exports of current instance from all other nodes.
9463 If an instance in a cluster with nodes A..D was exported to node C, its
9464 exports will be removed from the nodes A, B and D.
9467 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9469 nodelist = self.cfg.GetNodeList()
9470 nodelist.remove(self.dst_node.name)
9472 # on one-node clusters nodelist will be empty after the removal
9473 # if we proceed the backup would be removed because OpQueryExports
9474 # substitutes an empty list with the full cluster node list.
9475 iname = self.instance.name
9477 feedback_fn("Removing old exports for instance %s" % iname)
9478 exportlist = self.rpc.call_export_list(nodelist)
9479 for node in exportlist:
9480 if exportlist[node].fail_msg:
9482 if iname in exportlist[node].payload:
9483 msg = self.rpc.call_export_remove(node, iname).fail_msg
9485 self.LogWarning("Could not remove older export for instance %s"
9486 " on node %s: %s", iname, node, msg)
9488 def Exec(self, feedback_fn):
9489 """Export an instance to an image in the cluster.
9492 assert self.op.mode in constants.EXPORT_MODES
9494 instance = self.instance
9495 src_node = instance.primary_node
9497 if self.op.shutdown:
9498 # shutdown the instance, but not the disks
9499 feedback_fn("Shutting down instance %s" % instance.name)
9500 result = self.rpc.call_instance_shutdown(src_node, instance,
9501 self.op.shutdown_timeout)
9502 # TODO: Maybe ignore failures if ignore_remove_failures is set
9503 result.Raise("Could not shutdown instance %s on"
9504 " node %s" % (instance.name, src_node))
9506 # set the disks ID correctly since call_instance_start needs the
9507 # correct drbd minor to create the symlinks
9508 for disk in instance.disks:
9509 self.cfg.SetDiskID(disk, src_node)
9511 activate_disks = (not instance.admin_up)
9514 # Activate the instance disks if we'exporting a stopped instance
9515 feedback_fn("Activating disks for %s" % instance.name)
9516 _StartInstanceDisks(self, instance, None)
9519 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9522 helper.CreateSnapshots()
9524 if (self.op.shutdown and instance.admin_up and
9525 not self.op.remove_instance):
9526 assert not activate_disks
9527 feedback_fn("Starting instance %s" % instance.name)
9528 result = self.rpc.call_instance_start(src_node, instance, None, None)
9529 msg = result.fail_msg
9531 feedback_fn("Failed to start instance: %s" % msg)
9532 _ShutdownInstanceDisks(self, instance)
9533 raise errors.OpExecError("Could not start instance: %s" % msg)
9535 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9536 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9537 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9538 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9539 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9541 (key_name, _, _) = self.x509_key_name
9544 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9547 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9548 key_name, dest_ca_pem,
9553 # Check for backwards compatibility
9554 assert len(dresults) == len(instance.disks)
9555 assert compat.all(isinstance(i, bool) for i in dresults), \
9556 "Not all results are boolean: %r" % dresults
9560 feedback_fn("Deactivating disks for %s" % instance.name)
9561 _ShutdownInstanceDisks(self, instance)
9563 if not (compat.all(dresults) and fin_resu):
9566 failures.append("export finalization")
9567 if not compat.all(dresults):
9568 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9570 failures.append("disk export: disk(s) %s" % fdsk)
9572 raise errors.OpExecError("Export failed, errors in %s" %
9573 utils.CommaJoin(failures))
9575 # At this point, the export was successful, we can cleanup/finish
9577 # Remove instance if requested
9578 if self.op.remove_instance:
9579 feedback_fn("Removing instance %s" % instance.name)
9580 _RemoveInstance(self, feedback_fn, instance,
9581 self.op.ignore_remove_failures)
9583 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9584 self._CleanupExports(feedback_fn)
9586 return fin_resu, dresults
9589 class LURemoveExport(NoHooksLU):
9590 """Remove exports related to the named instance.
9598 def ExpandNames(self):
9599 self.needed_locks = {}
9600 # We need all nodes to be locked in order for RemoveExport to work, but we
9601 # don't need to lock the instance itself, as nothing will happen to it (and
9602 # we can remove exports also for a removed instance)
9603 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9605 def Exec(self, feedback_fn):
9606 """Remove any export.
9609 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9610 # If the instance was not found we'll try with the name that was passed in.
9611 # This will only work if it was an FQDN, though.
9613 if not instance_name:
9615 instance_name = self.op.instance_name
9617 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9618 exportlist = self.rpc.call_export_list(locked_nodes)
9620 for node in exportlist:
9621 msg = exportlist[node].fail_msg
9623 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9625 if instance_name in exportlist[node].payload:
9627 result = self.rpc.call_export_remove(node, instance_name)
9628 msg = result.fail_msg
9630 logging.error("Could not remove export for instance %s"
9631 " on node %s: %s", instance_name, node, msg)
9633 if fqdn_warn and not found:
9634 feedback_fn("Export not found. If trying to remove an export belonging"
9635 " to a deleted instance please use its Fully Qualified"
9639 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9642 This is an abstract class which is the parent of all the other tags LUs.
9646 def ExpandNames(self):
9647 self.needed_locks = {}
9648 if self.op.kind == constants.TAG_NODE:
9649 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9650 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9651 elif self.op.kind == constants.TAG_INSTANCE:
9652 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9653 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9655 def CheckPrereq(self):
9656 """Check prerequisites.
9659 if self.op.kind == constants.TAG_CLUSTER:
9660 self.target = self.cfg.GetClusterInfo()
9661 elif self.op.kind == constants.TAG_NODE:
9662 self.target = self.cfg.GetNodeInfo(self.op.name)
9663 elif self.op.kind == constants.TAG_INSTANCE:
9664 self.target = self.cfg.GetInstanceInfo(self.op.name)
9666 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9667 str(self.op.kind), errors.ECODE_INVAL)
9670 class LUGetTags(TagsLU):
9671 """Returns the tags of a given object.
9675 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9676 ("name", _NoDefault, _TNonEmptyString),
9680 def Exec(self, feedback_fn):
9681 """Returns the tag list.
9684 return list(self.target.GetTags())
9687 class LUSearchTags(NoHooksLU):
9688 """Searches the tags for a given pattern.
9692 ("pattern", _NoDefault, _TNonEmptyString),
9696 def ExpandNames(self):
9697 self.needed_locks = {}
9699 def CheckPrereq(self):
9700 """Check prerequisites.
9702 This checks the pattern passed for validity by compiling it.
9706 self.re = re.compile(self.op.pattern)
9707 except re.error, err:
9708 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9709 (self.op.pattern, err), errors.ECODE_INVAL)
9711 def Exec(self, feedback_fn):
9712 """Returns the tag list.
9716 tgts = [("/cluster", cfg.GetClusterInfo())]
9717 ilist = cfg.GetAllInstancesInfo().values()
9718 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9719 nlist = cfg.GetAllNodesInfo().values()
9720 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9722 for path, target in tgts:
9723 for tag in target.GetTags():
9724 if self.re.search(tag):
9725 results.append((path, tag))
9729 class LUAddTags(TagsLU):
9730 """Sets a tag on a given object.
9734 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9735 ("name", _NoDefault, _TNonEmptyString),
9736 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9740 def CheckPrereq(self):
9741 """Check prerequisites.
9743 This checks the type and length of the tag name and value.
9746 TagsLU.CheckPrereq(self)
9747 for tag in self.op.tags:
9748 objects.TaggableObject.ValidateTag(tag)
9750 def Exec(self, feedback_fn):
9755 for tag in self.op.tags:
9756 self.target.AddTag(tag)
9757 except errors.TagError, err:
9758 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9759 self.cfg.Update(self.target, feedback_fn)
9762 class LUDelTags(TagsLU):
9763 """Delete a list of tags from a given object.
9767 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9768 ("name", _NoDefault, _TNonEmptyString),
9769 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9773 def CheckPrereq(self):
9774 """Check prerequisites.
9776 This checks that we have the given tag.
9779 TagsLU.CheckPrereq(self)
9780 for tag in self.op.tags:
9781 objects.TaggableObject.ValidateTag(tag)
9782 del_tags = frozenset(self.op.tags)
9783 cur_tags = self.target.GetTags()
9784 if not del_tags <= cur_tags:
9785 diff_tags = del_tags - cur_tags
9786 diff_names = ["'%s'" % tag for tag in diff_tags]
9788 raise errors.OpPrereqError("Tag(s) %s not found" %
9789 (",".join(diff_names)), errors.ECODE_NOENT)
9791 def Exec(self, feedback_fn):
9792 """Remove the tag from the object.
9795 for tag in self.op.tags:
9796 self.target.RemoveTag(tag)
9797 self.cfg.Update(self.target, feedback_fn)
9800 class LUTestDelay(NoHooksLU):
9801 """Sleep for a specified amount of time.
9803 This LU sleeps on the master and/or nodes for a specified amount of
9808 ("duration", _NoDefault, _TFloat),
9809 ("on_master", True, _TBool),
9810 ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9811 ("repeat", 0, _TPositiveInt)
9815 def ExpandNames(self):
9816 """Expand names and set required locks.
9818 This expands the node list, if any.
9821 self.needed_locks = {}
9822 if self.op.on_nodes:
9823 # _GetWantedNodes can be used here, but is not always appropriate to use
9824 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9826 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9827 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9829 def _TestDelay(self):
9830 """Do the actual sleep.
9833 if self.op.on_master:
9834 if not utils.TestDelay(self.op.duration):
9835 raise errors.OpExecError("Error during master delay test")
9836 if self.op.on_nodes:
9837 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9838 for node, node_result in result.items():
9839 node_result.Raise("Failure during rpc call to node %s" % node)
9841 def Exec(self, feedback_fn):
9842 """Execute the test delay opcode, with the wanted repetitions.
9845 if self.op.repeat == 0:
9848 top_value = self.op.repeat - 1
9849 for i in range(self.op.repeat):
9850 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9854 class LUTestJobqueue(NoHooksLU):
9855 """Utility LU to test some aspects of the job queue.
9859 ("notify_waitlock", False, _TBool),
9860 ("notify_exec", False, _TBool),
9861 ("log_messages", _EmptyList, _TListOf(_TString)),
9862 ("fail", False, _TBool),
9866 # Must be lower than default timeout for WaitForJobChange to see whether it
9867 # notices changed jobs
9868 _CLIENT_CONNECT_TIMEOUT = 20.0
9869 _CLIENT_CONFIRM_TIMEOUT = 60.0
9872 def _NotifyUsingSocket(cls, cb, errcls):
9873 """Opens a Unix socket and waits for another program to connect.
9876 @param cb: Callback to send socket name to client
9878 @param errcls: Exception class to use for errors
9881 # Using a temporary directory as there's no easy way to create temporary
9882 # sockets without writing a custom loop around tempfile.mktemp and
9884 tmpdir = tempfile.mkdtemp()
9886 tmpsock = utils.PathJoin(tmpdir, "sock")
9888 logging.debug("Creating temporary socket at %s", tmpsock)
9889 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
9894 # Send details to client
9897 # Wait for client to connect before continuing
9898 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
9900 (conn, _) = sock.accept()
9901 except socket.error, err:
9902 raise errcls("Client didn't connect in time (%s)" % err)
9906 # Remove as soon as client is connected
9907 shutil.rmtree(tmpdir)
9909 # Wait for client to close
9912 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
9914 except socket.error, err:
9915 raise errcls("Client failed to confirm notification (%s)" % err)
9919 def _SendNotification(self, test, arg, sockname):
9920 """Sends a notification to the client.
9923 @param test: Test name
9924 @param arg: Test argument (depends on test)
9925 @type sockname: string
9926 @param sockname: Socket path
9929 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
9931 def _Notify(self, prereq, test, arg):
9932 """Notifies the client of a test.
9935 @param prereq: Whether this is a prereq-phase test
9937 @param test: Test name
9938 @param arg: Test argument (depends on test)
9942 errcls = errors.OpPrereqError
9944 errcls = errors.OpExecError
9946 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
9950 def CheckArguments(self):
9951 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
9952 self.expandnames_calls = 0
9954 def ExpandNames(self):
9955 checkargs_calls = getattr(self, "checkargs_calls", 0)
9956 if checkargs_calls < 1:
9957 raise errors.ProgrammerError("CheckArguments was not called")
9959 self.expandnames_calls += 1
9961 if self.op.notify_waitlock:
9962 self._Notify(True, constants.JQT_EXPANDNAMES, None)
9964 self.LogInfo("Expanding names")
9966 # Get lock on master node (just to get a lock, not for a particular reason)
9967 self.needed_locks = {
9968 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
9971 def Exec(self, feedback_fn):
9972 if self.expandnames_calls < 1:
9973 raise errors.ProgrammerError("ExpandNames was not called")
9975 if self.op.notify_exec:
9976 self._Notify(False, constants.JQT_EXEC, None)
9978 self.LogInfo("Executing")
9980 if self.op.log_messages:
9981 for idx, msg in enumerate(self.op.log_messages):
9982 self.LogInfo("Sending log message %s", idx + 1)
9983 feedback_fn(constants.JQT_MSGPREFIX + msg)
9984 # Report how many test messages have been sent
9985 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
9988 raise errors.OpExecError("Opcode failure was requested")
9993 class IAllocator(object):
9994 """IAllocator framework.
9996 An IAllocator instance has three sets of attributes:
9997 - cfg that is needed to query the cluster
9998 - input data (all members of the _KEYS class attribute are required)
9999 - four buffer attributes (in|out_data|text), that represent the
10000 input (to the external script) in text and data structure format,
10001 and the output from it, again in two formats
10002 - the result variables from the script (success, info, nodes) for
10006 # pylint: disable-msg=R0902
10007 # lots of instance attributes
10009 "name", "mem_size", "disks", "disk_template",
10010 "os", "tags", "nics", "vcpus", "hypervisor",
10013 "name", "relocate_from",
10019 def __init__(self, cfg, rpc, mode, **kwargs):
10022 # init buffer variables
10023 self.in_text = self.out_text = self.in_data = self.out_data = None
10024 # init all input fields so that pylint is happy
10026 self.mem_size = self.disks = self.disk_template = None
10027 self.os = self.tags = self.nics = self.vcpus = None
10028 self.hypervisor = None
10029 self.relocate_from = None
10031 self.evac_nodes = None
10033 self.required_nodes = None
10034 # init result fields
10035 self.success = self.info = self.result = None
10036 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10037 keyset = self._ALLO_KEYS
10038 fn = self._AddNewInstance
10039 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10040 keyset = self._RELO_KEYS
10041 fn = self._AddRelocateInstance
10042 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10043 keyset = self._EVAC_KEYS
10044 fn = self._AddEvacuateNodes
10046 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10047 " IAllocator" % self.mode)
10049 if key not in keyset:
10050 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10051 " IAllocator" % key)
10052 setattr(self, key, kwargs[key])
10055 if key not in kwargs:
10056 raise errors.ProgrammerError("Missing input parameter '%s' to"
10057 " IAllocator" % key)
10058 self._BuildInputData(fn)
10060 def _ComputeClusterData(self):
10061 """Compute the generic allocator input data.
10063 This is the data that is independent of the actual operation.
10067 cluster_info = cfg.GetClusterInfo()
10070 "version": constants.IALLOCATOR_VERSION,
10071 "cluster_name": cfg.GetClusterName(),
10072 "cluster_tags": list(cluster_info.GetTags()),
10073 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10074 # we don't have job IDs
10076 iinfo = cfg.GetAllInstancesInfo().values()
10077 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10081 node_list = cfg.GetNodeList()
10083 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10084 hypervisor_name = self.hypervisor
10085 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10086 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10087 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10088 hypervisor_name = cluster_info.enabled_hypervisors[0]
10090 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10093 self.rpc.call_all_instances_info(node_list,
10094 cluster_info.enabled_hypervisors)
10095 for nname, nresult in node_data.items():
10096 # first fill in static (config-based) values
10097 ninfo = cfg.GetNodeInfo(nname)
10099 "tags": list(ninfo.GetTags()),
10100 "primary_ip": ninfo.primary_ip,
10101 "secondary_ip": ninfo.secondary_ip,
10102 "offline": ninfo.offline,
10103 "drained": ninfo.drained,
10104 "master_candidate": ninfo.master_candidate,
10107 if not (ninfo.offline or ninfo.drained):
10108 nresult.Raise("Can't get data for node %s" % nname)
10109 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10111 remote_info = nresult.payload
10113 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10114 'vg_size', 'vg_free', 'cpu_total']:
10115 if attr not in remote_info:
10116 raise errors.OpExecError("Node '%s' didn't return attribute"
10117 " '%s'" % (nname, attr))
10118 if not isinstance(remote_info[attr], int):
10119 raise errors.OpExecError("Node '%s' returned invalid value"
10121 (nname, attr, remote_info[attr]))
10122 # compute memory used by primary instances
10123 i_p_mem = i_p_up_mem = 0
10124 for iinfo, beinfo in i_list:
10125 if iinfo.primary_node == nname:
10126 i_p_mem += beinfo[constants.BE_MEMORY]
10127 if iinfo.name not in node_iinfo[nname].payload:
10130 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10131 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10132 remote_info['memory_free'] -= max(0, i_mem_diff)
10135 i_p_up_mem += beinfo[constants.BE_MEMORY]
10137 # compute memory used by instances
10139 "total_memory": remote_info['memory_total'],
10140 "reserved_memory": remote_info['memory_dom0'],
10141 "free_memory": remote_info['memory_free'],
10142 "total_disk": remote_info['vg_size'],
10143 "free_disk": remote_info['vg_free'],
10144 "total_cpus": remote_info['cpu_total'],
10145 "i_pri_memory": i_p_mem,
10146 "i_pri_up_memory": i_p_up_mem,
10148 pnr.update(pnr_dyn)
10150 node_results[nname] = pnr
10151 data["nodes"] = node_results
10155 for iinfo, beinfo in i_list:
10157 for nic in iinfo.nics:
10158 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10159 nic_dict = {"mac": nic.mac,
10161 "mode": filled_params[constants.NIC_MODE],
10162 "link": filled_params[constants.NIC_LINK],
10164 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10165 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10166 nic_data.append(nic_dict)
10168 "tags": list(iinfo.GetTags()),
10169 "admin_up": iinfo.admin_up,
10170 "vcpus": beinfo[constants.BE_VCPUS],
10171 "memory": beinfo[constants.BE_MEMORY],
10173 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10175 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10176 "disk_template": iinfo.disk_template,
10177 "hypervisor": iinfo.hypervisor,
10179 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10181 instance_data[iinfo.name] = pir
10183 data["instances"] = instance_data
10185 self.in_data = data
10187 def _AddNewInstance(self):
10188 """Add new instance data to allocator structure.
10190 This in combination with _AllocatorGetClusterData will create the
10191 correct structure needed as input for the allocator.
10193 The checks for the completeness of the opcode must have already been
10197 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10199 if self.disk_template in constants.DTS_NET_MIRROR:
10200 self.required_nodes = 2
10202 self.required_nodes = 1
10205 "disk_template": self.disk_template,
10208 "vcpus": self.vcpus,
10209 "memory": self.mem_size,
10210 "disks": self.disks,
10211 "disk_space_total": disk_space,
10213 "required_nodes": self.required_nodes,
10217 def _AddRelocateInstance(self):
10218 """Add relocate instance data to allocator structure.
10220 This in combination with _IAllocatorGetClusterData will create the
10221 correct structure needed as input for the allocator.
10223 The checks for the completeness of the opcode must have already been
10227 instance = self.cfg.GetInstanceInfo(self.name)
10228 if instance is None:
10229 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10230 " IAllocator" % self.name)
10232 if instance.disk_template not in constants.DTS_NET_MIRROR:
10233 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10234 errors.ECODE_INVAL)
10236 if len(instance.secondary_nodes) != 1:
10237 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10238 errors.ECODE_STATE)
10240 self.required_nodes = 1
10241 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10242 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10246 "disk_space_total": disk_space,
10247 "required_nodes": self.required_nodes,
10248 "relocate_from": self.relocate_from,
10252 def _AddEvacuateNodes(self):
10253 """Add evacuate nodes data to allocator structure.
10257 "evac_nodes": self.evac_nodes
10261 def _BuildInputData(self, fn):
10262 """Build input data structures.
10265 self._ComputeClusterData()
10268 request["type"] = self.mode
10269 self.in_data["request"] = request
10271 self.in_text = serializer.Dump(self.in_data)
10273 def Run(self, name, validate=True, call_fn=None):
10274 """Run an instance allocator and return the results.
10277 if call_fn is None:
10278 call_fn = self.rpc.call_iallocator_runner
10280 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10281 result.Raise("Failure while running the iallocator script")
10283 self.out_text = result.payload
10285 self._ValidateResult()
10287 def _ValidateResult(self):
10288 """Process the allocator results.
10290 This will process and if successful save the result in
10291 self.out_data and the other parameters.
10295 rdict = serializer.Load(self.out_text)
10296 except Exception, err:
10297 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10299 if not isinstance(rdict, dict):
10300 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10302 # TODO: remove backwards compatiblity in later versions
10303 if "nodes" in rdict and "result" not in rdict:
10304 rdict["result"] = rdict["nodes"]
10307 for key in "success", "info", "result":
10308 if key not in rdict:
10309 raise errors.OpExecError("Can't parse iallocator results:"
10310 " missing key '%s'" % key)
10311 setattr(self, key, rdict[key])
10313 if not isinstance(rdict["result"], list):
10314 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10316 self.out_data = rdict
10319 class LUTestAllocator(NoHooksLU):
10320 """Run allocator tests.
10322 This LU runs the allocator tests
10326 ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10327 ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10328 ("name", _NoDefault, _TNonEmptyString),
10329 ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10330 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10331 _TOr(_TNone, _TNonEmptyString))))),
10332 ("disks", _NoDefault, _TOr(_TNone, _TList)),
10333 ("hypervisor", None, _TMaybeString),
10334 ("allocator", None, _TMaybeString),
10335 ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10336 ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10337 ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10338 ("os", None, _TMaybeString),
10339 ("disk_template", None, _TMaybeString),
10340 ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10343 def CheckPrereq(self):
10344 """Check prerequisites.
10346 This checks the opcode parameters depending on the director and mode test.
10349 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10350 for attr in ["mem_size", "disks", "disk_template",
10351 "os", "tags", "nics", "vcpus"]:
10352 if not hasattr(self.op, attr):
10353 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10354 attr, errors.ECODE_INVAL)
10355 iname = self.cfg.ExpandInstanceName(self.op.name)
10356 if iname is not None:
10357 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10358 iname, errors.ECODE_EXISTS)
10359 if not isinstance(self.op.nics, list):
10360 raise errors.OpPrereqError("Invalid parameter 'nics'",
10361 errors.ECODE_INVAL)
10362 if not isinstance(self.op.disks, list):
10363 raise errors.OpPrereqError("Invalid parameter 'disks'",
10364 errors.ECODE_INVAL)
10365 for row in self.op.disks:
10366 if (not isinstance(row, dict) or
10367 "size" not in row or
10368 not isinstance(row["size"], int) or
10369 "mode" not in row or
10370 row["mode"] not in ['r', 'w']):
10371 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10372 " parameter", errors.ECODE_INVAL)
10373 if self.op.hypervisor is None:
10374 self.op.hypervisor = self.cfg.GetHypervisorType()
10375 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10376 fname = _ExpandInstanceName(self.cfg, self.op.name)
10377 self.op.name = fname
10378 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10379 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10380 if not hasattr(self.op, "evac_nodes"):
10381 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10382 " opcode input", errors.ECODE_INVAL)
10384 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10385 self.op.mode, errors.ECODE_INVAL)
10387 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10388 if self.op.allocator is None:
10389 raise errors.OpPrereqError("Missing allocator name",
10390 errors.ECODE_INVAL)
10391 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10392 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10393 self.op.direction, errors.ECODE_INVAL)
10395 def Exec(self, feedback_fn):
10396 """Run the allocator test.
10399 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10400 ial = IAllocator(self.cfg, self.rpc,
10403 mem_size=self.op.mem_size,
10404 disks=self.op.disks,
10405 disk_template=self.op.disk_template,
10409 vcpus=self.op.vcpus,
10410 hypervisor=self.op.hypervisor,
10412 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10413 ial = IAllocator(self.cfg, self.rpc,
10416 relocate_from=list(self.relocate_from),
10418 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10419 ial = IAllocator(self.cfg, self.rpc,
10421 evac_nodes=self.op.evac_nodes)
10423 raise errors.ProgrammerError("Uncatched mode %s in"
10424 " LUTestAllocator.Exec", self.op.mode)
10426 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10427 result = ial.in_text
10429 ial.Run(self.op.allocator, validate=False)
10430 result = ial.out_text