4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
57 import ganeti.masterd.instance # pylint: disable-msg=W0611
60 # Modifiable default values; need to define these here before the
64 """Returns an empty list.
71 """Returns an empty dict.
77 #: The without-default default value
81 #: The no-type (value to complex to check it in the type system)
87 """Checks if the given value is not None.
90 return val is not None
94 """Checks if the given value is None.
101 """Checks if the given value is a boolean.
104 return isinstance(val, bool)
108 """Checks if the given value is an integer.
111 return isinstance(val, int)
115 """Checks if the given value is a float.
118 return isinstance(val, float)
122 """Checks if the given value is a string.
125 return isinstance(val, basestring)
129 """Checks if a given value evaluates to a boolean True value.
135 def _TElemOf(target_list):
136 """Builds a function that checks if a given value is a member of a list.
139 return lambda val: val in target_list
144 """Checks if the given value is a list.
147 return isinstance(val, list)
151 """Checks if the given value is a dictionary.
154 return isinstance(val, dict)
159 """Combine multiple functions using an AND operation.
163 return compat.all(t(val) for t in args)
168 """Combine multiple functions using an AND operation.
172 return compat.any(t(val) for t in args)
178 #: a non-empty string
179 _TNonEmptyString = _TAnd(_TString, _TTrue)
182 #: a maybe non-empty string
183 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
186 #: a maybe boolean (bool or none)
187 _TMaybeBool = _TOr(_TBool, _TNone)
190 #: a positive integer
191 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
193 #: a strictly positive integer
194 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
197 def _TListOf(my_type):
198 """Checks if a given value is a list with all elements of the same type.
202 lambda lst: compat.all(my_type(v) for v in lst))
205 def _TDictOf(key_type, val_type):
206 """Checks a dict type for the type of its key/values.
210 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
211 and compat.all(val_type(v)
212 for v in my_dict.values())))
215 # Common opcode attributes
217 #: output fields for a query operation
218 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
221 #: the shutdown timeout
222 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
225 #: the force parameter
226 _PForce = ("force", False, _TBool)
228 #: a required instance name (for single-instance LUs)
229 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
232 #: a required node name (for single-node LUs)
233 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
235 #: the migration type (live/non-live)
236 _PMigrationMode = ("mode", None, _TOr(_TNone,
237 _TElemOf(constants.HT_MIGRATION_MODES)))
241 class LogicalUnit(object):
242 """Logical Unit base class.
244 Subclasses must follow these rules:
245 - implement ExpandNames
246 - implement CheckPrereq (except when tasklets are used)
247 - implement Exec (except when tasklets are used)
248 - implement BuildHooksEnv
249 - redefine HPATH and HTYPE
250 - optionally redefine their run requirements:
251 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
253 Note that all commands require root permissions.
255 @ivar dry_run_result: the value (if any) that will be returned to the caller
256 in dry-run mode (signalled by opcode dry_run parameter)
257 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
258 they should get if not already defined, and types they must match
266 def __init__(self, processor, op, context, rpc):
267 """Constructor for LogicalUnit.
269 This needs to be overridden in derived classes in order to check op
273 self.proc = processor
275 self.cfg = context.cfg
276 self.context = context
278 # Dicts used to declare locking needs to mcpu
279 self.needed_locks = None
280 self.acquired_locks = {}
281 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
283 self.remove_locks = {}
284 # Used to force good behavior when calling helper functions
285 self.recalculate_locks = {}
288 self.Log = processor.Log # pylint: disable-msg=C0103
289 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
290 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
291 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
292 # support for dry-run
293 self.dry_run_result = None
294 # support for generic debug attribute
295 if (not hasattr(self.op, "debug_level") or
296 not isinstance(self.op.debug_level, int)):
297 self.op.debug_level = 0
302 # The new kind-of-type-system
303 op_id = self.op.OP_ID
304 for attr_name, aval, test in self._OP_PARAMS:
305 if not hasattr(op, attr_name):
306 if aval == _NoDefault:
307 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
308 (op_id, attr_name), errors.ECODE_INVAL)
314 setattr(self.op, attr_name, dval)
315 attr_val = getattr(op, attr_name)
319 if not callable(test):
320 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
321 " given type is not a proper type (%s)" %
322 (op_id, attr_name, test))
323 if not test(attr_val):
324 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
325 self.op.OP_ID, attr_name, type(attr_val), attr_val)
326 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
327 (op_id, attr_name), errors.ECODE_INVAL)
329 self.CheckArguments()
332 """Returns the SshRunner object
336 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
339 ssh = property(fget=__GetSSH)
341 def CheckArguments(self):
342 """Check syntactic validity for the opcode arguments.
344 This method is for doing a simple syntactic check and ensure
345 validity of opcode parameters, without any cluster-related
346 checks. While the same can be accomplished in ExpandNames and/or
347 CheckPrereq, doing these separate is better because:
349 - ExpandNames is left as as purely a lock-related function
350 - CheckPrereq is run after we have acquired locks (and possible
353 The function is allowed to change the self.op attribute so that
354 later methods can no longer worry about missing parameters.
359 def ExpandNames(self):
360 """Expand names for this LU.
362 This method is called before starting to execute the opcode, and it should
363 update all the parameters of the opcode to their canonical form (e.g. a
364 short node name must be fully expanded after this method has successfully
365 completed). This way locking, hooks, logging, ecc. can work correctly.
367 LUs which implement this method must also populate the self.needed_locks
368 member, as a dict with lock levels as keys, and a list of needed lock names
371 - use an empty dict if you don't need any lock
372 - if you don't need any lock at a particular level omit that level
373 - don't put anything for the BGL level
374 - if you want all locks at a level use locking.ALL_SET as a value
376 If you need to share locks (rather than acquire them exclusively) at one
377 level you can modify self.share_locks, setting a true value (usually 1) for
378 that level. By default locks are not shared.
380 This function can also define a list of tasklets, which then will be
381 executed in order instead of the usual LU-level CheckPrereq and Exec
382 functions, if those are not defined by the LU.
386 # Acquire all nodes and one instance
387 self.needed_locks = {
388 locking.LEVEL_NODE: locking.ALL_SET,
389 locking.LEVEL_INSTANCE: ['instance1.example.com'],
391 # Acquire just two nodes
392 self.needed_locks = {
393 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
396 self.needed_locks = {} # No, you can't leave it to the default value None
399 # The implementation of this method is mandatory only if the new LU is
400 # concurrent, so that old LUs don't need to be changed all at the same
403 self.needed_locks = {} # Exclusive LUs don't need locks.
405 raise NotImplementedError
407 def DeclareLocks(self, level):
408 """Declare LU locking needs for a level
410 While most LUs can just declare their locking needs at ExpandNames time,
411 sometimes there's the need to calculate some locks after having acquired
412 the ones before. This function is called just before acquiring locks at a
413 particular level, but after acquiring the ones at lower levels, and permits
414 such calculations. It can be used to modify self.needed_locks, and by
415 default it does nothing.
417 This function is only called if you have something already set in
418 self.needed_locks for the level.
420 @param level: Locking level which is going to be locked
421 @type level: member of ganeti.locking.LEVELS
425 def CheckPrereq(self):
426 """Check prerequisites for this LU.
428 This method should check that the prerequisites for the execution
429 of this LU are fulfilled. It can do internode communication, but
430 it should be idempotent - no cluster or system changes are
433 The method should raise errors.OpPrereqError in case something is
434 not fulfilled. Its return value is ignored.
436 This method should also update all the parameters of the opcode to
437 their canonical form if it hasn't been done by ExpandNames before.
440 if self.tasklets is not None:
441 for (idx, tl) in enumerate(self.tasklets):
442 logging.debug("Checking prerequisites for tasklet %s/%s",
443 idx + 1, len(self.tasklets))
448 def Exec(self, feedback_fn):
451 This method should implement the actual work. It should raise
452 errors.OpExecError for failures that are somewhat dealt with in
456 if self.tasklets is not None:
457 for (idx, tl) in enumerate(self.tasklets):
458 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
461 raise NotImplementedError
463 def BuildHooksEnv(self):
464 """Build hooks environment for this LU.
466 This method should return a three-node tuple consisting of: a dict
467 containing the environment that will be used for running the
468 specific hook for this LU, a list of node names on which the hook
469 should run before the execution, and a list of node names on which
470 the hook should run after the execution.
472 The keys of the dict must not have 'GANETI_' prefixed as this will
473 be handled in the hooks runner. Also note additional keys will be
474 added by the hooks runner. If the LU doesn't define any
475 environment, an empty dict (and not None) should be returned.
477 No nodes should be returned as an empty list (and not None).
479 Note that if the HPATH for a LU class is None, this function will
483 raise NotImplementedError
485 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
486 """Notify the LU about the results of its hooks.
488 This method is called every time a hooks phase is executed, and notifies
489 the Logical Unit about the hooks' result. The LU can then use it to alter
490 its result based on the hooks. By default the method does nothing and the
491 previous result is passed back unchanged but any LU can define it if it
492 wants to use the local cluster hook-scripts somehow.
494 @param phase: one of L{constants.HOOKS_PHASE_POST} or
495 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
496 @param hook_results: the results of the multi-node hooks rpc call
497 @param feedback_fn: function used send feedback back to the caller
498 @param lu_result: the previous Exec result this LU had, or None
500 @return: the new Exec result, based on the previous result
504 # API must be kept, thus we ignore the unused argument and could
505 # be a function warnings
506 # pylint: disable-msg=W0613,R0201
509 def _ExpandAndLockInstance(self):
510 """Helper function to expand and lock an instance.
512 Many LUs that work on an instance take its name in self.op.instance_name
513 and need to expand it and then declare the expanded name for locking. This
514 function does it, and then updates self.op.instance_name to the expanded
515 name. It also initializes needed_locks as a dict, if this hasn't been done
519 if self.needed_locks is None:
520 self.needed_locks = {}
522 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
523 "_ExpandAndLockInstance called with instance-level locks set"
524 self.op.instance_name = _ExpandInstanceName(self.cfg,
525 self.op.instance_name)
526 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
528 def _LockInstancesNodes(self, primary_only=False):
529 """Helper function to declare instances' nodes for locking.
531 This function should be called after locking one or more instances to lock
532 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
533 with all primary or secondary nodes for instances already locked and
534 present in self.needed_locks[locking.LEVEL_INSTANCE].
536 It should be called from DeclareLocks, and for safety only works if
537 self.recalculate_locks[locking.LEVEL_NODE] is set.
539 In the future it may grow parameters to just lock some instance's nodes, or
540 to just lock primaries or secondary nodes, if needed.
542 If should be called in DeclareLocks in a way similar to::
544 if level == locking.LEVEL_NODE:
545 self._LockInstancesNodes()
547 @type primary_only: boolean
548 @param primary_only: only lock primary nodes of locked instances
551 assert locking.LEVEL_NODE in self.recalculate_locks, \
552 "_LockInstancesNodes helper function called with no nodes to recalculate"
554 # TODO: check if we're really been called with the instance locks held
556 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
557 # future we might want to have different behaviors depending on the value
558 # of self.recalculate_locks[locking.LEVEL_NODE]
560 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
561 instance = self.context.cfg.GetInstanceInfo(instance_name)
562 wanted_nodes.append(instance.primary_node)
564 wanted_nodes.extend(instance.secondary_nodes)
566 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
567 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
568 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
569 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
571 del self.recalculate_locks[locking.LEVEL_NODE]
574 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
575 """Simple LU which runs no hooks.
577 This LU is intended as a parent for other LogicalUnits which will
578 run no hooks, in order to reduce duplicate code.
584 def BuildHooksEnv(self):
585 """Empty BuildHooksEnv for NoHooksLu.
587 This just raises an error.
590 assert False, "BuildHooksEnv called for NoHooksLUs"
594 """Tasklet base class.
596 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
597 they can mix legacy code with tasklets. Locking needs to be done in the LU,
598 tasklets know nothing about locks.
600 Subclasses must follow these rules:
601 - Implement CheckPrereq
605 def __init__(self, lu):
612 def CheckPrereq(self):
613 """Check prerequisites for this tasklets.
615 This method should check whether the prerequisites for the execution of
616 this tasklet are fulfilled. It can do internode communication, but it
617 should be idempotent - no cluster or system changes are allowed.
619 The method should raise errors.OpPrereqError in case something is not
620 fulfilled. Its return value is ignored.
622 This method should also update all parameters to their canonical form if it
623 hasn't been done before.
628 def Exec(self, feedback_fn):
629 """Execute the tasklet.
631 This method should implement the actual work. It should raise
632 errors.OpExecError for failures that are somewhat dealt with in code, or
636 raise NotImplementedError
639 def _GetWantedNodes(lu, nodes):
640 """Returns list of checked and expanded node names.
642 @type lu: L{LogicalUnit}
643 @param lu: the logical unit on whose behalf we execute
645 @param nodes: list of node names or None for all nodes
647 @return: the list of nodes, sorted
648 @raise errors.ProgrammerError: if the nodes parameter is wrong type
652 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
653 " non-empty list of nodes whose name is to be expanded.")
655 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
656 return utils.NiceSort(wanted)
659 def _GetWantedInstances(lu, instances):
660 """Returns list of checked and expanded instance names.
662 @type lu: L{LogicalUnit}
663 @param lu: the logical unit on whose behalf we execute
664 @type instances: list
665 @param instances: list of instance names or None for all instances
667 @return: the list of instances, sorted
668 @raise errors.OpPrereqError: if the instances parameter is wrong type
669 @raise errors.OpPrereqError: if any of the passed instances is not found
673 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
675 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
679 def _GetUpdatedParams(old_params, update_dict,
680 use_default=True, use_none=False):
681 """Return the new version of a parameter dictionary.
683 @type old_params: dict
684 @param old_params: old parameters
685 @type update_dict: dict
686 @param update_dict: dict containing new parameter values, or
687 constants.VALUE_DEFAULT to reset the parameter to its default
689 @param use_default: boolean
690 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
691 values as 'to be deleted' values
692 @param use_none: boolean
693 @type use_none: whether to recognise C{None} values as 'to be
696 @return: the new parameter dictionary
699 params_copy = copy.deepcopy(old_params)
700 for key, val in update_dict.iteritems():
701 if ((use_default and val == constants.VALUE_DEFAULT) or
702 (use_none and val is None)):
708 params_copy[key] = val
712 def _CheckOutputFields(static, dynamic, selected):
713 """Checks whether all selected fields are valid.
715 @type static: L{utils.FieldSet}
716 @param static: static fields set
717 @type dynamic: L{utils.FieldSet}
718 @param dynamic: dynamic fields set
725 delta = f.NonMatching(selected)
727 raise errors.OpPrereqError("Unknown output fields selected: %s"
728 % ",".join(delta), errors.ECODE_INVAL)
731 def _CheckGlobalHvParams(params):
732 """Validates that given hypervisor params are not global ones.
734 This will ensure that instances don't get customised versions of
738 used_globals = constants.HVC_GLOBALS.intersection(params)
740 msg = ("The following hypervisor parameters are global and cannot"
741 " be customized at instance level, please modify them at"
742 " cluster level: %s" % utils.CommaJoin(used_globals))
743 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
746 def _CheckNodeOnline(lu, node):
747 """Ensure that a given node is online.
749 @param lu: the LU on behalf of which we make the check
750 @param node: the node to check
751 @raise errors.OpPrereqError: if the node is offline
754 if lu.cfg.GetNodeInfo(node).offline:
755 raise errors.OpPrereqError("Can't use offline node %s" % node,
759 def _CheckNodeNotDrained(lu, node):
760 """Ensure that a given node is not drained.
762 @param lu: the LU on behalf of which we make the check
763 @param node: the node to check
764 @raise errors.OpPrereqError: if the node is drained
767 if lu.cfg.GetNodeInfo(node).drained:
768 raise errors.OpPrereqError("Can't use drained node %s" % node,
772 def _CheckNodeHasOS(lu, node, os_name, force_variant):
773 """Ensure that a node supports a given OS.
775 @param lu: the LU on behalf of which we make the check
776 @param node: the node to check
777 @param os_name: the OS to query about
778 @param force_variant: whether to ignore variant errors
779 @raise errors.OpPrereqError: if the node is not supporting the OS
782 result = lu.rpc.call_os_get(node, os_name)
783 result.Raise("OS '%s' not in supported OS list for node %s" %
785 prereq=True, ecode=errors.ECODE_INVAL)
786 if not force_variant:
787 _CheckOSVariant(result.payload, os_name)
790 def _RequireFileStorage():
791 """Checks that file storage is enabled.
793 @raise errors.OpPrereqError: when file storage is disabled
796 if not constants.ENABLE_FILE_STORAGE:
797 raise errors.OpPrereqError("File storage disabled at configure time",
801 def _CheckDiskTemplate(template):
802 """Ensure a given disk template is valid.
805 if template not in constants.DISK_TEMPLATES:
806 msg = ("Invalid disk template name '%s', valid templates are: %s" %
807 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
808 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
809 if template == constants.DT_FILE:
810 _RequireFileStorage()
814 def _CheckStorageType(storage_type):
815 """Ensure a given storage type is valid.
818 if storage_type not in constants.VALID_STORAGE_TYPES:
819 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
821 if storage_type == constants.ST_FILE:
822 _RequireFileStorage()
826 def _GetClusterDomainSecret():
827 """Reads the cluster domain secret.
830 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
834 def _CheckInstanceDown(lu, instance, reason):
835 """Ensure that an instance is not running."""
836 if instance.admin_up:
837 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
838 (instance.name, reason), errors.ECODE_STATE)
840 pnode = instance.primary_node
841 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
842 ins_l.Raise("Can't contact node %s for instance information" % pnode,
843 prereq=True, ecode=errors.ECODE_ENVIRON)
845 if instance.name in ins_l.payload:
846 raise errors.OpPrereqError("Instance %s is running, %s" %
847 (instance.name, reason), errors.ECODE_STATE)
850 def _ExpandItemName(fn, name, kind):
851 """Expand an item name.
853 @param fn: the function to use for expansion
854 @param name: requested item name
855 @param kind: text description ('Node' or 'Instance')
856 @return: the resolved (full) name
857 @raise errors.OpPrereqError: if the item is not found
861 if full_name is None:
862 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
867 def _ExpandNodeName(cfg, name):
868 """Wrapper over L{_ExpandItemName} for nodes."""
869 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
872 def _ExpandInstanceName(cfg, name):
873 """Wrapper over L{_ExpandItemName} for instance."""
874 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
877 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
878 memory, vcpus, nics, disk_template, disks,
879 bep, hvp, hypervisor_name):
880 """Builds instance related env variables for hooks
882 This builds the hook environment from individual variables.
885 @param name: the name of the instance
886 @type primary_node: string
887 @param primary_node: the name of the instance's primary node
888 @type secondary_nodes: list
889 @param secondary_nodes: list of secondary nodes as strings
890 @type os_type: string
891 @param os_type: the name of the instance's OS
892 @type status: boolean
893 @param status: the should_run status of the instance
895 @param memory: the memory size of the instance
897 @param vcpus: the count of VCPUs the instance has
899 @param nics: list of tuples (ip, mac, mode, link) representing
900 the NICs the instance has
901 @type disk_template: string
902 @param disk_template: the disk template of the instance
904 @param disks: the list of (size, mode) pairs
906 @param bep: the backend parameters for the instance
908 @param hvp: the hypervisor parameters for the instance
909 @type hypervisor_name: string
910 @param hypervisor_name: the hypervisor for the instance
912 @return: the hook environment for this instance
921 "INSTANCE_NAME": name,
922 "INSTANCE_PRIMARY": primary_node,
923 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
924 "INSTANCE_OS_TYPE": os_type,
925 "INSTANCE_STATUS": str_status,
926 "INSTANCE_MEMORY": memory,
927 "INSTANCE_VCPUS": vcpus,
928 "INSTANCE_DISK_TEMPLATE": disk_template,
929 "INSTANCE_HYPERVISOR": hypervisor_name,
933 nic_count = len(nics)
934 for idx, (ip, mac, mode, link) in enumerate(nics):
937 env["INSTANCE_NIC%d_IP" % idx] = ip
938 env["INSTANCE_NIC%d_MAC" % idx] = mac
939 env["INSTANCE_NIC%d_MODE" % idx] = mode
940 env["INSTANCE_NIC%d_LINK" % idx] = link
941 if mode == constants.NIC_MODE_BRIDGED:
942 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
946 env["INSTANCE_NIC_COUNT"] = nic_count
949 disk_count = len(disks)
950 for idx, (size, mode) in enumerate(disks):
951 env["INSTANCE_DISK%d_SIZE" % idx] = size
952 env["INSTANCE_DISK%d_MODE" % idx] = mode
956 env["INSTANCE_DISK_COUNT"] = disk_count
958 for source, kind in [(bep, "BE"), (hvp, "HV")]:
959 for key, value in source.items():
960 env["INSTANCE_%s_%s" % (kind, key)] = value
965 def _NICListToTuple(lu, nics):
966 """Build a list of nic information tuples.
968 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
969 value in LUQueryInstanceData.
971 @type lu: L{LogicalUnit}
972 @param lu: the logical unit on whose behalf we execute
973 @type nics: list of L{objects.NIC}
974 @param nics: list of nics to convert to hooks tuples
978 cluster = lu.cfg.GetClusterInfo()
982 filled_params = cluster.SimpleFillNIC(nic.nicparams)
983 mode = filled_params[constants.NIC_MODE]
984 link = filled_params[constants.NIC_LINK]
985 hooks_nics.append((ip, mac, mode, link))
989 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
990 """Builds instance related env variables for hooks from an object.
992 @type lu: L{LogicalUnit}
993 @param lu: the logical unit on whose behalf we execute
994 @type instance: L{objects.Instance}
995 @param instance: the instance for which we should build the
998 @param override: dictionary with key/values that will override
1001 @return: the hook environment dictionary
1004 cluster = lu.cfg.GetClusterInfo()
1005 bep = cluster.FillBE(instance)
1006 hvp = cluster.FillHV(instance)
1008 'name': instance.name,
1009 'primary_node': instance.primary_node,
1010 'secondary_nodes': instance.secondary_nodes,
1011 'os_type': instance.os,
1012 'status': instance.admin_up,
1013 'memory': bep[constants.BE_MEMORY],
1014 'vcpus': bep[constants.BE_VCPUS],
1015 'nics': _NICListToTuple(lu, instance.nics),
1016 'disk_template': instance.disk_template,
1017 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1020 'hypervisor_name': instance.hypervisor,
1023 args.update(override)
1024 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1027 def _AdjustCandidatePool(lu, exceptions):
1028 """Adjust the candidate pool after node operations.
1031 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1033 lu.LogInfo("Promoted nodes to master candidate role: %s",
1034 utils.CommaJoin(node.name for node in mod_list))
1035 for name in mod_list:
1036 lu.context.ReaddNode(name)
1037 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1039 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1043 def _DecideSelfPromotion(lu, exceptions=None):
1044 """Decide whether I should promote myself as a master candidate.
1047 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1048 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1049 # the new node will increase mc_max with one, so:
1050 mc_should = min(mc_should + 1, cp_size)
1051 return mc_now < mc_should
1054 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1055 """Check that the brigdes needed by a list of nics exist.
1058 cluster = lu.cfg.GetClusterInfo()
1059 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1060 brlist = [params[constants.NIC_LINK] for params in paramslist
1061 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1063 result = lu.rpc.call_bridges_exist(target_node, brlist)
1064 result.Raise("Error checking bridges on destination node '%s'" %
1065 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1068 def _CheckInstanceBridgesExist(lu, instance, node=None):
1069 """Check that the brigdes needed by an instance exist.
1073 node = instance.primary_node
1074 _CheckNicsBridgesExist(lu, instance.nics, node)
1077 def _CheckOSVariant(os_obj, name):
1078 """Check whether an OS name conforms to the os variants specification.
1080 @type os_obj: L{objects.OS}
1081 @param os_obj: OS object to check
1083 @param name: OS name passed by the user, to check for validity
1086 if not os_obj.supported_variants:
1089 variant = name.split("+", 1)[1]
1091 raise errors.OpPrereqError("OS name must include a variant",
1094 if variant not in os_obj.supported_variants:
1095 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1098 def _GetNodeInstancesInner(cfg, fn):
1099 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1102 def _GetNodeInstances(cfg, node_name):
1103 """Returns a list of all primary and secondary instances on a node.
1107 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1110 def _GetNodePrimaryInstances(cfg, node_name):
1111 """Returns primary instances on a node.
1114 return _GetNodeInstancesInner(cfg,
1115 lambda inst: node_name == inst.primary_node)
1118 def _GetNodeSecondaryInstances(cfg, node_name):
1119 """Returns secondary instances on a node.
1122 return _GetNodeInstancesInner(cfg,
1123 lambda inst: node_name in inst.secondary_nodes)
1126 def _GetStorageTypeArgs(cfg, storage_type):
1127 """Returns the arguments for a storage type.
1130 # Special case for file storage
1131 if storage_type == constants.ST_FILE:
1132 # storage.FileStorage wants a list of storage directories
1133 return [[cfg.GetFileStorageDir()]]
1138 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1141 for dev in instance.disks:
1142 cfg.SetDiskID(dev, node_name)
1144 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1145 result.Raise("Failed to get disk status from node %s" % node_name,
1146 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1148 for idx, bdev_status in enumerate(result.payload):
1149 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1155 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1156 """Check the sanity of iallocator and node arguments and use the
1157 cluster-wide iallocator if appropriate.
1159 Check that at most one of (iallocator, node) is specified. If none is
1160 specified, then the LU's opcode's iallocator slot is filled with the
1161 cluster-wide default iallocator.
1163 @type iallocator_slot: string
1164 @param iallocator_slot: the name of the opcode iallocator slot
1165 @type node_slot: string
1166 @param node_slot: the name of the opcode target node slot
1169 node = getattr(lu.op, node_slot, None)
1170 iallocator = getattr(lu.op, iallocator_slot, None)
1172 if node is not None and iallocator is not None:
1173 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1175 elif node is None and iallocator is None:
1176 default_iallocator = lu.cfg.GetDefaultIAllocator()
1177 if default_iallocator:
1178 setattr(lu.op, iallocator_slot, default_iallocator)
1180 raise errors.OpPrereqError("No iallocator or node given and no"
1181 " cluster-wide default iallocator found."
1182 " Please specify either an iallocator or a"
1183 " node, or set a cluster-wide default"
1187 class LUPostInitCluster(LogicalUnit):
1188 """Logical unit for running hooks after cluster initialization.
1191 HPATH = "cluster-init"
1192 HTYPE = constants.HTYPE_CLUSTER
1194 def BuildHooksEnv(self):
1198 env = {"OP_TARGET": self.cfg.GetClusterName()}
1199 mn = self.cfg.GetMasterNode()
1200 return env, [], [mn]
1202 def Exec(self, feedback_fn):
1209 class LUDestroyCluster(LogicalUnit):
1210 """Logical unit for destroying the cluster.
1213 HPATH = "cluster-destroy"
1214 HTYPE = constants.HTYPE_CLUSTER
1216 def BuildHooksEnv(self):
1220 env = {"OP_TARGET": self.cfg.GetClusterName()}
1223 def CheckPrereq(self):
1224 """Check prerequisites.
1226 This checks whether the cluster is empty.
1228 Any errors are signaled by raising errors.OpPrereqError.
1231 master = self.cfg.GetMasterNode()
1233 nodelist = self.cfg.GetNodeList()
1234 if len(nodelist) != 1 or nodelist[0] != master:
1235 raise errors.OpPrereqError("There are still %d node(s) in"
1236 " this cluster." % (len(nodelist) - 1),
1238 instancelist = self.cfg.GetInstanceList()
1240 raise errors.OpPrereqError("There are still %d instance(s) in"
1241 " this cluster." % len(instancelist),
1244 def Exec(self, feedback_fn):
1245 """Destroys the cluster.
1248 master = self.cfg.GetMasterNode()
1249 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1251 # Run post hooks on master node before it's removed
1252 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1254 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1256 # pylint: disable-msg=W0702
1257 self.LogWarning("Errors occurred running hooks on %s" % master)
1259 result = self.rpc.call_node_stop_master(master, False)
1260 result.Raise("Could not disable the master role")
1262 if modify_ssh_setup:
1263 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1264 utils.CreateBackup(priv_key)
1265 utils.CreateBackup(pub_key)
1270 def _VerifyCertificate(filename):
1271 """Verifies a certificate for LUVerifyCluster.
1273 @type filename: string
1274 @param filename: Path to PEM file
1278 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1279 utils.ReadFile(filename))
1280 except Exception, err: # pylint: disable-msg=W0703
1281 return (LUVerifyCluster.ETYPE_ERROR,
1282 "Failed to load X509 certificate %s: %s" % (filename, err))
1285 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1286 constants.SSL_CERT_EXPIRATION_ERROR)
1289 fnamemsg = "While verifying %s: %s" % (filename, msg)
1294 return (None, fnamemsg)
1295 elif errcode == utils.CERT_WARNING:
1296 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1297 elif errcode == utils.CERT_ERROR:
1298 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1300 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1303 class LUVerifyCluster(LogicalUnit):
1304 """Verifies the cluster status.
1307 HPATH = "cluster-verify"
1308 HTYPE = constants.HTYPE_CLUSTER
1310 ("skip_checks", _EmptyList,
1311 _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1312 ("verbose", False, _TBool),
1313 ("error_codes", False, _TBool),
1314 ("debug_simulate_errors", False, _TBool),
1318 TCLUSTER = "cluster"
1320 TINSTANCE = "instance"
1322 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1323 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1324 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1325 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1326 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1327 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1328 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1329 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1330 ENODEDRBD = (TNODE, "ENODEDRBD")
1331 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1332 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1333 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1334 ENODEHV = (TNODE, "ENODEHV")
1335 ENODELVM = (TNODE, "ENODELVM")
1336 ENODEN1 = (TNODE, "ENODEN1")
1337 ENODENET = (TNODE, "ENODENET")
1338 ENODEOS = (TNODE, "ENODEOS")
1339 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1340 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1341 ENODERPC = (TNODE, "ENODERPC")
1342 ENODESSH = (TNODE, "ENODESSH")
1343 ENODEVERSION = (TNODE, "ENODEVERSION")
1344 ENODESETUP = (TNODE, "ENODESETUP")
1345 ENODETIME = (TNODE, "ENODETIME")
1347 ETYPE_FIELD = "code"
1348 ETYPE_ERROR = "ERROR"
1349 ETYPE_WARNING = "WARNING"
1351 class NodeImage(object):
1352 """A class representing the logical and physical status of a node.
1355 @ivar name: the node name to which this object refers
1356 @ivar volumes: a structure as returned from
1357 L{ganeti.backend.GetVolumeList} (runtime)
1358 @ivar instances: a list of running instances (runtime)
1359 @ivar pinst: list of configured primary instances (config)
1360 @ivar sinst: list of configured secondary instances (config)
1361 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1362 of this node (config)
1363 @ivar mfree: free memory, as reported by hypervisor (runtime)
1364 @ivar dfree: free disk, as reported by the node (runtime)
1365 @ivar offline: the offline status (config)
1366 @type rpc_fail: boolean
1367 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1368 not whether the individual keys were correct) (runtime)
1369 @type lvm_fail: boolean
1370 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1371 @type hyp_fail: boolean
1372 @ivar hyp_fail: whether the RPC call didn't return the instance list
1373 @type ghost: boolean
1374 @ivar ghost: whether this is a known node or not (config)
1375 @type os_fail: boolean
1376 @ivar os_fail: whether the RPC call didn't return valid OS data
1378 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1381 def __init__(self, offline=False, name=None):
1390 self.offline = offline
1391 self.rpc_fail = False
1392 self.lvm_fail = False
1393 self.hyp_fail = False
1395 self.os_fail = False
1398 def ExpandNames(self):
1399 self.needed_locks = {
1400 locking.LEVEL_NODE: locking.ALL_SET,
1401 locking.LEVEL_INSTANCE: locking.ALL_SET,
1403 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1405 def _Error(self, ecode, item, msg, *args, **kwargs):
1406 """Format an error message.
1408 Based on the opcode's error_codes parameter, either format a
1409 parseable error code, or a simpler error string.
1411 This must be called only from Exec and functions called from Exec.
1414 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1416 # first complete the msg
1419 # then format the whole message
1420 if self.op.error_codes:
1421 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1427 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1428 # and finally report it via the feedback_fn
1429 self._feedback_fn(" - %s" % msg)
1431 def _ErrorIf(self, cond, *args, **kwargs):
1432 """Log an error message if the passed condition is True.
1435 cond = bool(cond) or self.op.debug_simulate_errors
1437 self._Error(*args, **kwargs)
1438 # do not mark the operation as failed for WARN cases only
1439 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1440 self.bad = self.bad or cond
1442 def _VerifyNode(self, ninfo, nresult):
1443 """Perform some basic validation on data returned from a node.
1445 - check the result data structure is well formed and has all the
1447 - check ganeti version
1449 @type ninfo: L{objects.Node}
1450 @param ninfo: the node to check
1451 @param nresult: the results from the node
1453 @return: whether overall this call was successful (and we can expect
1454 reasonable values in the respose)
1458 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1460 # main result, nresult should be a non-empty dict
1461 test = not nresult or not isinstance(nresult, dict)
1462 _ErrorIf(test, self.ENODERPC, node,
1463 "unable to verify node: no data returned")
1467 # compares ganeti version
1468 local_version = constants.PROTOCOL_VERSION
1469 remote_version = nresult.get("version", None)
1470 test = not (remote_version and
1471 isinstance(remote_version, (list, tuple)) and
1472 len(remote_version) == 2)
1473 _ErrorIf(test, self.ENODERPC, node,
1474 "connection to node returned invalid data")
1478 test = local_version != remote_version[0]
1479 _ErrorIf(test, self.ENODEVERSION, node,
1480 "incompatible protocol versions: master %s,"
1481 " node %s", local_version, remote_version[0])
1485 # node seems compatible, we can actually try to look into its results
1487 # full package version
1488 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1489 self.ENODEVERSION, node,
1490 "software version mismatch: master %s, node %s",
1491 constants.RELEASE_VERSION, remote_version[1],
1492 code=self.ETYPE_WARNING)
1494 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1495 if isinstance(hyp_result, dict):
1496 for hv_name, hv_result in hyp_result.iteritems():
1497 test = hv_result is not None
1498 _ErrorIf(test, self.ENODEHV, node,
1499 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1502 test = nresult.get(constants.NV_NODESETUP,
1503 ["Missing NODESETUP results"])
1504 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1509 def _VerifyNodeTime(self, ninfo, nresult,
1510 nvinfo_starttime, nvinfo_endtime):
1511 """Check the node time.
1513 @type ninfo: L{objects.Node}
1514 @param ninfo: the node to check
1515 @param nresult: the remote results for the node
1516 @param nvinfo_starttime: the start time of the RPC call
1517 @param nvinfo_endtime: the end time of the RPC call
1521 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1523 ntime = nresult.get(constants.NV_TIME, None)
1525 ntime_merged = utils.MergeTime(ntime)
1526 except (ValueError, TypeError):
1527 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1530 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1531 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1532 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1533 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1537 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1538 "Node time diverges by at least %s from master node time",
1541 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1542 """Check the node time.
1544 @type ninfo: L{objects.Node}
1545 @param ninfo: the node to check
1546 @param nresult: the remote results for the node
1547 @param vg_name: the configured VG name
1554 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1556 # checks vg existence and size > 20G
1557 vglist = nresult.get(constants.NV_VGLIST, None)
1559 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1561 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1562 constants.MIN_VG_SIZE)
1563 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1566 pvlist = nresult.get(constants.NV_PVLIST, None)
1567 test = pvlist is None
1568 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1570 # check that ':' is not present in PV names, since it's a
1571 # special character for lvcreate (denotes the range of PEs to
1573 for _, pvname, owner_vg in pvlist:
1574 test = ":" in pvname
1575 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1576 " '%s' of VG '%s'", pvname, owner_vg)
1578 def _VerifyNodeNetwork(self, ninfo, nresult):
1579 """Check the node time.
1581 @type ninfo: L{objects.Node}
1582 @param ninfo: the node to check
1583 @param nresult: the remote results for the node
1587 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1589 test = constants.NV_NODELIST not in nresult
1590 _ErrorIf(test, self.ENODESSH, node,
1591 "node hasn't returned node ssh connectivity data")
1593 if nresult[constants.NV_NODELIST]:
1594 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1595 _ErrorIf(True, self.ENODESSH, node,
1596 "ssh communication with node '%s': %s", a_node, a_msg)
1598 test = constants.NV_NODENETTEST not in nresult
1599 _ErrorIf(test, self.ENODENET, node,
1600 "node hasn't returned node tcp connectivity data")
1602 if nresult[constants.NV_NODENETTEST]:
1603 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1605 _ErrorIf(True, self.ENODENET, node,
1606 "tcp communication with node '%s': %s",
1607 anode, nresult[constants.NV_NODENETTEST][anode])
1609 test = constants.NV_MASTERIP not in nresult
1610 _ErrorIf(test, self.ENODENET, node,
1611 "node hasn't returned node master IP reachability data")
1613 if not nresult[constants.NV_MASTERIP]:
1614 if node == self.master_node:
1615 msg = "the master node cannot reach the master IP (not configured?)"
1617 msg = "cannot reach the master IP"
1618 _ErrorIf(True, self.ENODENET, node, msg)
1621 def _VerifyInstance(self, instance, instanceconfig, node_image):
1622 """Verify an instance.
1624 This function checks to see if the required block devices are
1625 available on the instance's node.
1628 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1629 node_current = instanceconfig.primary_node
1631 node_vol_should = {}
1632 instanceconfig.MapLVsByNode(node_vol_should)
1634 for node in node_vol_should:
1635 n_img = node_image[node]
1636 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1637 # ignore missing volumes on offline or broken nodes
1639 for volume in node_vol_should[node]:
1640 test = volume not in n_img.volumes
1641 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1642 "volume %s missing on node %s", volume, node)
1644 if instanceconfig.admin_up:
1645 pri_img = node_image[node_current]
1646 test = instance not in pri_img.instances and not pri_img.offline
1647 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1648 "instance not running on its primary node %s",
1651 for node, n_img in node_image.items():
1652 if (not node == node_current):
1653 test = instance in n_img.instances
1654 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1655 "instance should not run on node %s", node)
1657 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1658 """Verify if there are any unknown volumes in the cluster.
1660 The .os, .swap and backup volumes are ignored. All other volumes are
1661 reported as unknown.
1663 @type reserved: L{ganeti.utils.FieldSet}
1664 @param reserved: a FieldSet of reserved volume names
1667 for node, n_img in node_image.items():
1668 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1669 # skip non-healthy nodes
1671 for volume in n_img.volumes:
1672 test = ((node not in node_vol_should or
1673 volume not in node_vol_should[node]) and
1674 not reserved.Matches(volume))
1675 self._ErrorIf(test, self.ENODEORPHANLV, node,
1676 "volume %s is unknown", volume)
1678 def _VerifyOrphanInstances(self, instancelist, node_image):
1679 """Verify the list of running instances.
1681 This checks what instances are running but unknown to the cluster.
1684 for node, n_img in node_image.items():
1685 for o_inst in n_img.instances:
1686 test = o_inst not in instancelist
1687 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1688 "instance %s on node %s should not exist", o_inst, node)
1690 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1691 """Verify N+1 Memory Resilience.
1693 Check that if one single node dies we can still start all the
1694 instances it was primary for.
1697 for node, n_img in node_image.items():
1698 # This code checks that every node which is now listed as
1699 # secondary has enough memory to host all instances it is
1700 # supposed to should a single other node in the cluster fail.
1701 # FIXME: not ready for failover to an arbitrary node
1702 # FIXME: does not support file-backed instances
1703 # WARNING: we currently take into account down instances as well
1704 # as up ones, considering that even if they're down someone
1705 # might want to start them even in the event of a node failure.
1706 for prinode, instances in n_img.sbp.items():
1708 for instance in instances:
1709 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1710 if bep[constants.BE_AUTO_BALANCE]:
1711 needed_mem += bep[constants.BE_MEMORY]
1712 test = n_img.mfree < needed_mem
1713 self._ErrorIf(test, self.ENODEN1, node,
1714 "not enough memory on to accommodate"
1715 " failovers should peer node %s fail", prinode)
1717 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1719 """Verifies and computes the node required file checksums.
1721 @type ninfo: L{objects.Node}
1722 @param ninfo: the node to check
1723 @param nresult: the remote results for the node
1724 @param file_list: required list of files
1725 @param local_cksum: dictionary of local files and their checksums
1726 @param master_files: list of files that only masters should have
1730 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1732 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1733 test = not isinstance(remote_cksum, dict)
1734 _ErrorIf(test, self.ENODEFILECHECK, node,
1735 "node hasn't returned file checksum data")
1739 for file_name in file_list:
1740 node_is_mc = ninfo.master_candidate
1741 must_have = (file_name not in master_files) or node_is_mc
1743 test1 = file_name not in remote_cksum
1745 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1747 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1748 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1749 "file '%s' missing", file_name)
1750 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1751 "file '%s' has wrong checksum", file_name)
1752 # not candidate and this is not a must-have file
1753 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1754 "file '%s' should not exist on non master"
1755 " candidates (and the file is outdated)", file_name)
1756 # all good, except non-master/non-must have combination
1757 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1758 "file '%s' should not exist"
1759 " on non master candidates", file_name)
1761 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1763 """Verifies and the node DRBD status.
1765 @type ninfo: L{objects.Node}
1766 @param ninfo: the node to check
1767 @param nresult: the remote results for the node
1768 @param instanceinfo: the dict of instances
1769 @param drbd_helper: the configured DRBD usermode helper
1770 @param drbd_map: the DRBD map as returned by
1771 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1775 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1778 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1779 test = (helper_result == None)
1780 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1781 "no drbd usermode helper returned")
1783 status, payload = helper_result
1785 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1786 "drbd usermode helper check unsuccessful: %s", payload)
1787 test = status and (payload != drbd_helper)
1788 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1789 "wrong drbd usermode helper: %s", payload)
1791 # compute the DRBD minors
1793 for minor, instance in drbd_map[node].items():
1794 test = instance not in instanceinfo
1795 _ErrorIf(test, self.ECLUSTERCFG, None,
1796 "ghost instance '%s' in temporary DRBD map", instance)
1797 # ghost instance should not be running, but otherwise we
1798 # don't give double warnings (both ghost instance and
1799 # unallocated minor in use)
1801 node_drbd[minor] = (instance, False)
1803 instance = instanceinfo[instance]
1804 node_drbd[minor] = (instance.name, instance.admin_up)
1806 # and now check them
1807 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1808 test = not isinstance(used_minors, (tuple, list))
1809 _ErrorIf(test, self.ENODEDRBD, node,
1810 "cannot parse drbd status file: %s", str(used_minors))
1812 # we cannot check drbd status
1815 for minor, (iname, must_exist) in node_drbd.items():
1816 test = minor not in used_minors and must_exist
1817 _ErrorIf(test, self.ENODEDRBD, node,
1818 "drbd minor %d of instance %s is not active", minor, iname)
1819 for minor in used_minors:
1820 test = minor not in node_drbd
1821 _ErrorIf(test, self.ENODEDRBD, node,
1822 "unallocated drbd minor %d is in use", minor)
1824 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1825 """Builds the node OS structures.
1827 @type ninfo: L{objects.Node}
1828 @param ninfo: the node to check
1829 @param nresult: the remote results for the node
1830 @param nimg: the node image object
1834 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1836 remote_os = nresult.get(constants.NV_OSLIST, None)
1837 test = (not isinstance(remote_os, list) or
1838 not compat.all(isinstance(v, list) and len(v) == 7
1839 for v in remote_os))
1841 _ErrorIf(test, self.ENODEOS, node,
1842 "node hasn't returned valid OS data")
1851 for (name, os_path, status, diagnose,
1852 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1854 if name not in os_dict:
1857 # parameters is a list of lists instead of list of tuples due to
1858 # JSON lacking a real tuple type, fix it:
1859 parameters = [tuple(v) for v in parameters]
1860 os_dict[name].append((os_path, status, diagnose,
1861 set(variants), set(parameters), set(api_ver)))
1863 nimg.oslist = os_dict
1865 def _VerifyNodeOS(self, ninfo, nimg, base):
1866 """Verifies the node OS list.
1868 @type ninfo: L{objects.Node}
1869 @param ninfo: the node to check
1870 @param nimg: the node image object
1871 @param base: the 'template' node we match against (e.g. from the master)
1875 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1877 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1879 for os_name, os_data in nimg.oslist.items():
1880 assert os_data, "Empty OS status for OS %s?!" % os_name
1881 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1882 _ErrorIf(not f_status, self.ENODEOS, node,
1883 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1884 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1885 "OS '%s' has multiple entries (first one shadows the rest): %s",
1886 os_name, utils.CommaJoin([v[0] for v in os_data]))
1887 # this will catched in backend too
1888 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1889 and not f_var, self.ENODEOS, node,
1890 "OS %s with API at least %d does not declare any variant",
1891 os_name, constants.OS_API_V15)
1892 # comparisons with the 'base' image
1893 test = os_name not in base.oslist
1894 _ErrorIf(test, self.ENODEOS, node,
1895 "Extra OS %s not present on reference node (%s)",
1899 assert base.oslist[os_name], "Base node has empty OS status?"
1900 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1902 # base OS is invalid, skipping
1904 for kind, a, b in [("API version", f_api, b_api),
1905 ("variants list", f_var, b_var),
1906 ("parameters", f_param, b_param)]:
1907 _ErrorIf(a != b, self.ENODEOS, node,
1908 "OS %s %s differs from reference node %s: %s vs. %s",
1909 kind, os_name, base.name,
1910 utils.CommaJoin(a), utils.CommaJoin(b))
1912 # check any missing OSes
1913 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1914 _ErrorIf(missing, self.ENODEOS, node,
1915 "OSes present on reference node %s but missing on this node: %s",
1916 base.name, utils.CommaJoin(missing))
1918 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1919 """Verifies and updates the node volume data.
1921 This function will update a L{NodeImage}'s internal structures
1922 with data from the remote call.
1924 @type ninfo: L{objects.Node}
1925 @param ninfo: the node to check
1926 @param nresult: the remote results for the node
1927 @param nimg: the node image object
1928 @param vg_name: the configured VG name
1932 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1934 nimg.lvm_fail = True
1935 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1938 elif isinstance(lvdata, basestring):
1939 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1940 utils.SafeEncode(lvdata))
1941 elif not isinstance(lvdata, dict):
1942 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1944 nimg.volumes = lvdata
1945 nimg.lvm_fail = False
1947 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1948 """Verifies and updates the node instance list.
1950 If the listing was successful, then updates this node's instance
1951 list. Otherwise, it marks the RPC call as failed for the instance
1954 @type ninfo: L{objects.Node}
1955 @param ninfo: the node to check
1956 @param nresult: the remote results for the node
1957 @param nimg: the node image object
1960 idata = nresult.get(constants.NV_INSTANCELIST, None)
1961 test = not isinstance(idata, list)
1962 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1963 " (instancelist): %s", utils.SafeEncode(str(idata)))
1965 nimg.hyp_fail = True
1967 nimg.instances = idata
1969 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1970 """Verifies and computes a node information map
1972 @type ninfo: L{objects.Node}
1973 @param ninfo: the node to check
1974 @param nresult: the remote results for the node
1975 @param nimg: the node image object
1976 @param vg_name: the configured VG name
1980 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1982 # try to read free memory (from the hypervisor)
1983 hv_info = nresult.get(constants.NV_HVINFO, None)
1984 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1985 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1988 nimg.mfree = int(hv_info["memory_free"])
1989 except (ValueError, TypeError):
1990 _ErrorIf(True, self.ENODERPC, node,
1991 "node returned invalid nodeinfo, check hypervisor")
1993 # FIXME: devise a free space model for file based instances as well
1994 if vg_name is not None:
1995 test = (constants.NV_VGLIST not in nresult or
1996 vg_name not in nresult[constants.NV_VGLIST])
1997 _ErrorIf(test, self.ENODELVM, node,
1998 "node didn't return data for the volume group '%s'"
1999 " - it is either missing or broken", vg_name)
2002 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2003 except (ValueError, TypeError):
2004 _ErrorIf(True, self.ENODERPC, node,
2005 "node returned invalid LVM info, check LVM status")
2007 def BuildHooksEnv(self):
2010 Cluster-Verify hooks just ran in the post phase and their failure makes
2011 the output be logged in the verify output and the verification to fail.
2014 all_nodes = self.cfg.GetNodeList()
2016 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2018 for node in self.cfg.GetAllNodesInfo().values():
2019 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2021 return env, [], all_nodes
2023 def Exec(self, feedback_fn):
2024 """Verify integrity of cluster, performing various test on nodes.
2028 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2029 verbose = self.op.verbose
2030 self._feedback_fn = feedback_fn
2031 feedback_fn("* Verifying global settings")
2032 for msg in self.cfg.VerifyConfig():
2033 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2035 # Check the cluster certificates
2036 for cert_filename in constants.ALL_CERT_FILES:
2037 (errcode, msg) = _VerifyCertificate(cert_filename)
2038 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2040 vg_name = self.cfg.GetVGName()
2041 drbd_helper = self.cfg.GetDRBDHelper()
2042 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2043 cluster = self.cfg.GetClusterInfo()
2044 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2045 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2046 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2047 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2048 for iname in instancelist)
2049 i_non_redundant = [] # Non redundant instances
2050 i_non_a_balanced = [] # Non auto-balanced instances
2051 n_offline = 0 # Count of offline nodes
2052 n_drained = 0 # Count of nodes being drained
2053 node_vol_should = {}
2055 # FIXME: verify OS list
2056 # do local checksums
2057 master_files = [constants.CLUSTER_CONF_FILE]
2058 master_node = self.master_node = self.cfg.GetMasterNode()
2059 master_ip = self.cfg.GetMasterIP()
2061 file_names = ssconf.SimpleStore().GetFileList()
2062 file_names.extend(constants.ALL_CERT_FILES)
2063 file_names.extend(master_files)
2064 if cluster.modify_etc_hosts:
2065 file_names.append(constants.ETC_HOSTS)
2067 local_checksums = utils.FingerprintFiles(file_names)
2069 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2070 node_verify_param = {
2071 constants.NV_FILELIST: file_names,
2072 constants.NV_NODELIST: [node.name for node in nodeinfo
2073 if not node.offline],
2074 constants.NV_HYPERVISOR: hypervisors,
2075 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2076 node.secondary_ip) for node in nodeinfo
2077 if not node.offline],
2078 constants.NV_INSTANCELIST: hypervisors,
2079 constants.NV_VERSION: None,
2080 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2081 constants.NV_NODESETUP: None,
2082 constants.NV_TIME: None,
2083 constants.NV_MASTERIP: (master_node, master_ip),
2084 constants.NV_OSLIST: None,
2087 if vg_name is not None:
2088 node_verify_param[constants.NV_VGLIST] = None
2089 node_verify_param[constants.NV_LVLIST] = vg_name
2090 node_verify_param[constants.NV_PVLIST] = [vg_name]
2091 node_verify_param[constants.NV_DRBDLIST] = None
2094 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2096 # Build our expected cluster state
2097 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2099 for node in nodeinfo)
2101 for instance in instancelist:
2102 inst_config = instanceinfo[instance]
2104 for nname in inst_config.all_nodes:
2105 if nname not in node_image:
2107 gnode = self.NodeImage(name=nname)
2109 node_image[nname] = gnode
2111 inst_config.MapLVsByNode(node_vol_should)
2113 pnode = inst_config.primary_node
2114 node_image[pnode].pinst.append(instance)
2116 for snode in inst_config.secondary_nodes:
2117 nimg = node_image[snode]
2118 nimg.sinst.append(instance)
2119 if pnode not in nimg.sbp:
2120 nimg.sbp[pnode] = []
2121 nimg.sbp[pnode].append(instance)
2123 # At this point, we have the in-memory data structures complete,
2124 # except for the runtime information, which we'll gather next
2126 # Due to the way our RPC system works, exact response times cannot be
2127 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2128 # time before and after executing the request, we can at least have a time
2130 nvinfo_starttime = time.time()
2131 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2132 self.cfg.GetClusterName())
2133 nvinfo_endtime = time.time()
2135 all_drbd_map = self.cfg.ComputeDRBDMap()
2137 feedback_fn("* Verifying node status")
2141 for node_i in nodeinfo:
2143 nimg = node_image[node]
2147 feedback_fn("* Skipping offline node %s" % (node,))
2151 if node == master_node:
2153 elif node_i.master_candidate:
2154 ntype = "master candidate"
2155 elif node_i.drained:
2161 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2163 msg = all_nvinfo[node].fail_msg
2164 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2166 nimg.rpc_fail = True
2169 nresult = all_nvinfo[node].payload
2171 nimg.call_ok = self._VerifyNode(node_i, nresult)
2172 self._VerifyNodeNetwork(node_i, nresult)
2173 self._VerifyNodeLVM(node_i, nresult, vg_name)
2174 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2176 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2178 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2180 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2181 self._UpdateNodeInstances(node_i, nresult, nimg)
2182 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2183 self._UpdateNodeOS(node_i, nresult, nimg)
2184 if not nimg.os_fail:
2185 if refos_img is None:
2187 self._VerifyNodeOS(node_i, nimg, refos_img)
2189 feedback_fn("* Verifying instance status")
2190 for instance in instancelist:
2192 feedback_fn("* Verifying instance %s" % instance)
2193 inst_config = instanceinfo[instance]
2194 self._VerifyInstance(instance, inst_config, node_image)
2195 inst_nodes_offline = []
2197 pnode = inst_config.primary_node
2198 pnode_img = node_image[pnode]
2199 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2200 self.ENODERPC, pnode, "instance %s, connection to"
2201 " primary node failed", instance)
2203 if pnode_img.offline:
2204 inst_nodes_offline.append(pnode)
2206 # If the instance is non-redundant we cannot survive losing its primary
2207 # node, so we are not N+1 compliant. On the other hand we have no disk
2208 # templates with more than one secondary so that situation is not well
2210 # FIXME: does not support file-backed instances
2211 if not inst_config.secondary_nodes:
2212 i_non_redundant.append(instance)
2213 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2214 instance, "instance has multiple secondary nodes: %s",
2215 utils.CommaJoin(inst_config.secondary_nodes),
2216 code=self.ETYPE_WARNING)
2218 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2219 i_non_a_balanced.append(instance)
2221 for snode in inst_config.secondary_nodes:
2222 s_img = node_image[snode]
2223 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2224 "instance %s, connection to secondary node failed", instance)
2227 inst_nodes_offline.append(snode)
2229 # warn that the instance lives on offline nodes
2230 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2231 "instance lives on offline node(s) %s",
2232 utils.CommaJoin(inst_nodes_offline))
2233 # ... or ghost nodes
2234 for node in inst_config.all_nodes:
2235 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2236 "instance lives on ghost node %s", node)
2238 feedback_fn("* Verifying orphan volumes")
2239 reserved = utils.FieldSet(*cluster.reserved_lvs)
2240 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2242 feedback_fn("* Verifying orphan instances")
2243 self._VerifyOrphanInstances(instancelist, node_image)
2245 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2246 feedback_fn("* Verifying N+1 Memory redundancy")
2247 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2249 feedback_fn("* Other Notes")
2251 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2252 % len(i_non_redundant))
2254 if i_non_a_balanced:
2255 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2256 % len(i_non_a_balanced))
2259 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2262 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2266 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2267 """Analyze the post-hooks' result
2269 This method analyses the hook result, handles it, and sends some
2270 nicely-formatted feedback back to the user.
2272 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2273 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2274 @param hooks_results: the results of the multi-node hooks rpc call
2275 @param feedback_fn: function used send feedback back to the caller
2276 @param lu_result: previous Exec result
2277 @return: the new Exec result, based on the previous result
2281 # We only really run POST phase hooks, and are only interested in
2283 if phase == constants.HOOKS_PHASE_POST:
2284 # Used to change hooks' output to proper indentation
2285 indent_re = re.compile('^', re.M)
2286 feedback_fn("* Hooks Results")
2287 assert hooks_results, "invalid result from hooks"
2289 for node_name in hooks_results:
2290 res = hooks_results[node_name]
2292 test = msg and not res.offline
2293 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2294 "Communication failure in hooks execution: %s", msg)
2295 if res.offline or msg:
2296 # No need to investigate payload if node is offline or gave an error.
2297 # override manually lu_result here as _ErrorIf only
2298 # overrides self.bad
2301 for script, hkr, output in res.payload:
2302 test = hkr == constants.HKR_FAIL
2303 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2304 "Script %s failed, output:", script)
2306 output = indent_re.sub(' ', output)
2307 feedback_fn("%s" % output)
2313 class LUVerifyDisks(NoHooksLU):
2314 """Verifies the cluster disks status.
2319 def ExpandNames(self):
2320 self.needed_locks = {
2321 locking.LEVEL_NODE: locking.ALL_SET,
2322 locking.LEVEL_INSTANCE: locking.ALL_SET,
2324 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2326 def Exec(self, feedback_fn):
2327 """Verify integrity of cluster disks.
2329 @rtype: tuple of three items
2330 @return: a tuple of (dict of node-to-node_error, list of instances
2331 which need activate-disks, dict of instance: (node, volume) for
2335 result = res_nodes, res_instances, res_missing = {}, [], {}
2337 vg_name = self.cfg.GetVGName()
2338 nodes = utils.NiceSort(self.cfg.GetNodeList())
2339 instances = [self.cfg.GetInstanceInfo(name)
2340 for name in self.cfg.GetInstanceList()]
2343 for inst in instances:
2345 if (not inst.admin_up or
2346 inst.disk_template not in constants.DTS_NET_MIRROR):
2348 inst.MapLVsByNode(inst_lvs)
2349 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2350 for node, vol_list in inst_lvs.iteritems():
2351 for vol in vol_list:
2352 nv_dict[(node, vol)] = inst
2357 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2361 node_res = node_lvs[node]
2362 if node_res.offline:
2364 msg = node_res.fail_msg
2366 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2367 res_nodes[node] = msg
2370 lvs = node_res.payload
2371 for lv_name, (_, _, lv_online) in lvs.items():
2372 inst = nv_dict.pop((node, lv_name), None)
2373 if (not lv_online and inst is not None
2374 and inst.name not in res_instances):
2375 res_instances.append(inst.name)
2377 # any leftover items in nv_dict are missing LVs, let's arrange the
2379 for key, inst in nv_dict.iteritems():
2380 if inst.name not in res_missing:
2381 res_missing[inst.name] = []
2382 res_missing[inst.name].append(key)
2387 class LURepairDiskSizes(NoHooksLU):
2388 """Verifies the cluster disks sizes.
2391 _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2394 def ExpandNames(self):
2395 if self.op.instances:
2396 self.wanted_names = []
2397 for name in self.op.instances:
2398 full_name = _ExpandInstanceName(self.cfg, name)
2399 self.wanted_names.append(full_name)
2400 self.needed_locks = {
2401 locking.LEVEL_NODE: [],
2402 locking.LEVEL_INSTANCE: self.wanted_names,
2404 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2406 self.wanted_names = None
2407 self.needed_locks = {
2408 locking.LEVEL_NODE: locking.ALL_SET,
2409 locking.LEVEL_INSTANCE: locking.ALL_SET,
2411 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2413 def DeclareLocks(self, level):
2414 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2415 self._LockInstancesNodes(primary_only=True)
2417 def CheckPrereq(self):
2418 """Check prerequisites.
2420 This only checks the optional instance list against the existing names.
2423 if self.wanted_names is None:
2424 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2426 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2427 in self.wanted_names]
2429 def _EnsureChildSizes(self, disk):
2430 """Ensure children of the disk have the needed disk size.
2432 This is valid mainly for DRBD8 and fixes an issue where the
2433 children have smaller disk size.
2435 @param disk: an L{ganeti.objects.Disk} object
2438 if disk.dev_type == constants.LD_DRBD8:
2439 assert disk.children, "Empty children for DRBD8?"
2440 fchild = disk.children[0]
2441 mismatch = fchild.size < disk.size
2443 self.LogInfo("Child disk has size %d, parent %d, fixing",
2444 fchild.size, disk.size)
2445 fchild.size = disk.size
2447 # and we recurse on this child only, not on the metadev
2448 return self._EnsureChildSizes(fchild) or mismatch
2452 def Exec(self, feedback_fn):
2453 """Verify the size of cluster disks.
2456 # TODO: check child disks too
2457 # TODO: check differences in size between primary/secondary nodes
2459 for instance in self.wanted_instances:
2460 pnode = instance.primary_node
2461 if pnode not in per_node_disks:
2462 per_node_disks[pnode] = []
2463 for idx, disk in enumerate(instance.disks):
2464 per_node_disks[pnode].append((instance, idx, disk))
2467 for node, dskl in per_node_disks.items():
2468 newl = [v[2].Copy() for v in dskl]
2470 self.cfg.SetDiskID(dsk, node)
2471 result = self.rpc.call_blockdev_getsizes(node, newl)
2473 self.LogWarning("Failure in blockdev_getsizes call to node"
2474 " %s, ignoring", node)
2476 if len(result.data) != len(dskl):
2477 self.LogWarning("Invalid result from node %s, ignoring node results",
2480 for ((instance, idx, disk), size) in zip(dskl, result.data):
2482 self.LogWarning("Disk %d of instance %s did not return size"
2483 " information, ignoring", idx, instance.name)
2485 if not isinstance(size, (int, long)):
2486 self.LogWarning("Disk %d of instance %s did not return valid"
2487 " size information, ignoring", idx, instance.name)
2490 if size != disk.size:
2491 self.LogInfo("Disk %d of instance %s has mismatched size,"
2492 " correcting: recorded %d, actual %d", idx,
2493 instance.name, disk.size, size)
2495 self.cfg.Update(instance, feedback_fn)
2496 changed.append((instance.name, idx, size))
2497 if self._EnsureChildSizes(disk):
2498 self.cfg.Update(instance, feedback_fn)
2499 changed.append((instance.name, idx, disk.size))
2503 class LURenameCluster(LogicalUnit):
2504 """Rename the cluster.
2507 HPATH = "cluster-rename"
2508 HTYPE = constants.HTYPE_CLUSTER
2509 _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2511 def BuildHooksEnv(self):
2516 "OP_TARGET": self.cfg.GetClusterName(),
2517 "NEW_NAME": self.op.name,
2519 mn = self.cfg.GetMasterNode()
2520 all_nodes = self.cfg.GetNodeList()
2521 return env, [mn], all_nodes
2523 def CheckPrereq(self):
2524 """Verify that the passed name is a valid one.
2527 hostname = netutils.GetHostInfo(self.op.name)
2529 new_name = hostname.name
2530 self.ip = new_ip = hostname.ip
2531 old_name = self.cfg.GetClusterName()
2532 old_ip = self.cfg.GetMasterIP()
2533 if new_name == old_name and new_ip == old_ip:
2534 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2535 " cluster has changed",
2537 if new_ip != old_ip:
2538 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2539 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2540 " reachable on the network. Aborting." %
2541 new_ip, errors.ECODE_NOTUNIQUE)
2543 self.op.name = new_name
2545 def Exec(self, feedback_fn):
2546 """Rename the cluster.
2549 clustername = self.op.name
2552 # shutdown the master IP
2553 master = self.cfg.GetMasterNode()
2554 result = self.rpc.call_node_stop_master(master, False)
2555 result.Raise("Could not disable the master role")
2558 cluster = self.cfg.GetClusterInfo()
2559 cluster.cluster_name = clustername
2560 cluster.master_ip = ip
2561 self.cfg.Update(cluster, feedback_fn)
2563 # update the known hosts file
2564 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2565 node_list = self.cfg.GetNodeList()
2567 node_list.remove(master)
2570 result = self.rpc.call_upload_file(node_list,
2571 constants.SSH_KNOWN_HOSTS_FILE)
2572 for to_node, to_result in result.iteritems():
2573 msg = to_result.fail_msg
2575 msg = ("Copy of file %s to node %s failed: %s" %
2576 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2577 self.proc.LogWarning(msg)
2580 result = self.rpc.call_node_start_master(master, False, False)
2581 msg = result.fail_msg
2583 self.LogWarning("Could not re-enable the master role on"
2584 " the master, please restart manually: %s", msg)
2587 class LUSetClusterParams(LogicalUnit):
2588 """Change the parameters of the cluster.
2591 HPATH = "cluster-modify"
2592 HTYPE = constants.HTYPE_CLUSTER
2594 ("vg_name", None, _TMaybeString),
2595 ("enabled_hypervisors", None,
2596 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2597 ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2598 ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2599 ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2600 ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2601 ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2602 ("uid_pool", None, _NoType),
2603 ("add_uids", None, _NoType),
2604 ("remove_uids", None, _NoType),
2605 ("maintain_node_health", None, _TMaybeBool),
2606 ("nicparams", None, _TOr(_TDict, _TNone)),
2607 ("drbd_helper", None, _TOr(_TString, _TNone)),
2608 ("default_iallocator", None, _TMaybeString),
2612 def CheckArguments(self):
2616 if self.op.uid_pool:
2617 uidpool.CheckUidPool(self.op.uid_pool)
2619 if self.op.add_uids:
2620 uidpool.CheckUidPool(self.op.add_uids)
2622 if self.op.remove_uids:
2623 uidpool.CheckUidPool(self.op.remove_uids)
2625 def ExpandNames(self):
2626 # FIXME: in the future maybe other cluster params won't require checking on
2627 # all nodes to be modified.
2628 self.needed_locks = {
2629 locking.LEVEL_NODE: locking.ALL_SET,
2631 self.share_locks[locking.LEVEL_NODE] = 1
2633 def BuildHooksEnv(self):
2638 "OP_TARGET": self.cfg.GetClusterName(),
2639 "NEW_VG_NAME": self.op.vg_name,
2641 mn = self.cfg.GetMasterNode()
2642 return env, [mn], [mn]
2644 def CheckPrereq(self):
2645 """Check prerequisites.
2647 This checks whether the given params don't conflict and
2648 if the given volume group is valid.
2651 if self.op.vg_name is not None and not self.op.vg_name:
2652 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2653 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2654 " instances exist", errors.ECODE_INVAL)
2656 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2657 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2658 raise errors.OpPrereqError("Cannot disable drbd helper while"
2659 " drbd-based instances exist",
2662 node_list = self.acquired_locks[locking.LEVEL_NODE]
2664 # if vg_name not None, checks given volume group on all nodes
2666 vglist = self.rpc.call_vg_list(node_list)
2667 for node in node_list:
2668 msg = vglist[node].fail_msg
2670 # ignoring down node
2671 self.LogWarning("Error while gathering data on node %s"
2672 " (ignoring node): %s", node, msg)
2674 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2676 constants.MIN_VG_SIZE)
2678 raise errors.OpPrereqError("Error on node '%s': %s" %
2679 (node, vgstatus), errors.ECODE_ENVIRON)
2681 if self.op.drbd_helper:
2682 # checks given drbd helper on all nodes
2683 helpers = self.rpc.call_drbd_helper(node_list)
2684 for node in node_list:
2685 ninfo = self.cfg.GetNodeInfo(node)
2687 self.LogInfo("Not checking drbd helper on offline node %s", node)
2689 msg = helpers[node].fail_msg
2691 raise errors.OpPrereqError("Error checking drbd helper on node"
2692 " '%s': %s" % (node, msg),
2693 errors.ECODE_ENVIRON)
2694 node_helper = helpers[node].payload
2695 if node_helper != self.op.drbd_helper:
2696 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2697 (node, node_helper), errors.ECODE_ENVIRON)
2699 self.cluster = cluster = self.cfg.GetClusterInfo()
2700 # validate params changes
2701 if self.op.beparams:
2702 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2703 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2705 if self.op.nicparams:
2706 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2707 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2708 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2711 # check all instances for consistency
2712 for instance in self.cfg.GetAllInstancesInfo().values():
2713 for nic_idx, nic in enumerate(instance.nics):
2714 params_copy = copy.deepcopy(nic.nicparams)
2715 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2717 # check parameter syntax
2719 objects.NIC.CheckParameterSyntax(params_filled)
2720 except errors.ConfigurationError, err:
2721 nic_errors.append("Instance %s, nic/%d: %s" %
2722 (instance.name, nic_idx, err))
2724 # if we're moving instances to routed, check that they have an ip
2725 target_mode = params_filled[constants.NIC_MODE]
2726 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2727 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2728 (instance.name, nic_idx))
2730 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2731 "\n".join(nic_errors))
2733 # hypervisor list/parameters
2734 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2735 if self.op.hvparams:
2736 for hv_name, hv_dict in self.op.hvparams.items():
2737 if hv_name not in self.new_hvparams:
2738 self.new_hvparams[hv_name] = hv_dict
2740 self.new_hvparams[hv_name].update(hv_dict)
2742 # os hypervisor parameters
2743 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2745 for os_name, hvs in self.op.os_hvp.items():
2746 if os_name not in self.new_os_hvp:
2747 self.new_os_hvp[os_name] = hvs
2749 for hv_name, hv_dict in hvs.items():
2750 if hv_name not in self.new_os_hvp[os_name]:
2751 self.new_os_hvp[os_name][hv_name] = hv_dict
2753 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2756 self.new_osp = objects.FillDict(cluster.osparams, {})
2757 if self.op.osparams:
2758 for os_name, osp in self.op.osparams.items():
2759 if os_name not in self.new_osp:
2760 self.new_osp[os_name] = {}
2762 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2765 if not self.new_osp[os_name]:
2766 # we removed all parameters
2767 del self.new_osp[os_name]
2769 # check the parameter validity (remote check)
2770 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2771 os_name, self.new_osp[os_name])
2773 # changes to the hypervisor list
2774 if self.op.enabled_hypervisors is not None:
2775 self.hv_list = self.op.enabled_hypervisors
2776 for hv in self.hv_list:
2777 # if the hypervisor doesn't already exist in the cluster
2778 # hvparams, we initialize it to empty, and then (in both
2779 # cases) we make sure to fill the defaults, as we might not
2780 # have a complete defaults list if the hypervisor wasn't
2782 if hv not in new_hvp:
2784 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2785 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2787 self.hv_list = cluster.enabled_hypervisors
2789 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2790 # either the enabled list has changed, or the parameters have, validate
2791 for hv_name, hv_params in self.new_hvparams.items():
2792 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2793 (self.op.enabled_hypervisors and
2794 hv_name in self.op.enabled_hypervisors)):
2795 # either this is a new hypervisor, or its parameters have changed
2796 hv_class = hypervisor.GetHypervisor(hv_name)
2797 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2798 hv_class.CheckParameterSyntax(hv_params)
2799 _CheckHVParams(self, node_list, hv_name, hv_params)
2802 # no need to check any newly-enabled hypervisors, since the
2803 # defaults have already been checked in the above code-block
2804 for os_name, os_hvp in self.new_os_hvp.items():
2805 for hv_name, hv_params in os_hvp.items():
2806 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2807 # we need to fill in the new os_hvp on top of the actual hv_p
2808 cluster_defaults = self.new_hvparams.get(hv_name, {})
2809 new_osp = objects.FillDict(cluster_defaults, hv_params)
2810 hv_class = hypervisor.GetHypervisor(hv_name)
2811 hv_class.CheckParameterSyntax(new_osp)
2812 _CheckHVParams(self, node_list, hv_name, new_osp)
2814 if self.op.default_iallocator:
2815 alloc_script = utils.FindFile(self.op.default_iallocator,
2816 constants.IALLOCATOR_SEARCH_PATH,
2818 if alloc_script is None:
2819 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2820 " specified" % self.op.default_iallocator,
2823 def Exec(self, feedback_fn):
2824 """Change the parameters of the cluster.
2827 if self.op.vg_name is not None:
2828 new_volume = self.op.vg_name
2831 if new_volume != self.cfg.GetVGName():
2832 self.cfg.SetVGName(new_volume)
2834 feedback_fn("Cluster LVM configuration already in desired"
2835 " state, not changing")
2836 if self.op.drbd_helper is not None:
2837 new_helper = self.op.drbd_helper
2840 if new_helper != self.cfg.GetDRBDHelper():
2841 self.cfg.SetDRBDHelper(new_helper)
2843 feedback_fn("Cluster DRBD helper already in desired state,"
2845 if self.op.hvparams:
2846 self.cluster.hvparams = self.new_hvparams
2848 self.cluster.os_hvp = self.new_os_hvp
2849 if self.op.enabled_hypervisors is not None:
2850 self.cluster.hvparams = self.new_hvparams
2851 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2852 if self.op.beparams:
2853 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2854 if self.op.nicparams:
2855 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2856 if self.op.osparams:
2857 self.cluster.osparams = self.new_osp
2859 if self.op.candidate_pool_size is not None:
2860 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2861 # we need to update the pool size here, otherwise the save will fail
2862 _AdjustCandidatePool(self, [])
2864 if self.op.maintain_node_health is not None:
2865 self.cluster.maintain_node_health = self.op.maintain_node_health
2867 if self.op.add_uids is not None:
2868 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2870 if self.op.remove_uids is not None:
2871 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2873 if self.op.uid_pool is not None:
2874 self.cluster.uid_pool = self.op.uid_pool
2876 if self.op.default_iallocator is not None:
2877 self.cluster.default_iallocator = self.op.default_iallocator
2879 self.cfg.Update(self.cluster, feedback_fn)
2882 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2883 """Distribute additional files which are part of the cluster configuration.
2885 ConfigWriter takes care of distributing the config and ssconf files, but
2886 there are more files which should be distributed to all nodes. This function
2887 makes sure those are copied.
2889 @param lu: calling logical unit
2890 @param additional_nodes: list of nodes not in the config to distribute to
2893 # 1. Gather target nodes
2894 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2895 dist_nodes = lu.cfg.GetOnlineNodeList()
2896 if additional_nodes is not None:
2897 dist_nodes.extend(additional_nodes)
2898 if myself.name in dist_nodes:
2899 dist_nodes.remove(myself.name)
2901 # 2. Gather files to distribute
2902 dist_files = set([constants.ETC_HOSTS,
2903 constants.SSH_KNOWN_HOSTS_FILE,
2904 constants.RAPI_CERT_FILE,
2905 constants.RAPI_USERS_FILE,
2906 constants.CONFD_HMAC_KEY,
2907 constants.CLUSTER_DOMAIN_SECRET_FILE,
2910 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2911 for hv_name in enabled_hypervisors:
2912 hv_class = hypervisor.GetHypervisor(hv_name)
2913 dist_files.update(hv_class.GetAncillaryFiles())
2915 # 3. Perform the files upload
2916 for fname in dist_files:
2917 if os.path.exists(fname):
2918 result = lu.rpc.call_upload_file(dist_nodes, fname)
2919 for to_node, to_result in result.items():
2920 msg = to_result.fail_msg
2922 msg = ("Copy of file %s to node %s failed: %s" %
2923 (fname, to_node, msg))
2924 lu.proc.LogWarning(msg)
2927 class LURedistributeConfig(NoHooksLU):
2928 """Force the redistribution of cluster configuration.
2930 This is a very simple LU.
2935 def ExpandNames(self):
2936 self.needed_locks = {
2937 locking.LEVEL_NODE: locking.ALL_SET,
2939 self.share_locks[locking.LEVEL_NODE] = 1
2941 def Exec(self, feedback_fn):
2942 """Redistribute the configuration.
2945 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2946 _RedistributeAncillaryFiles(self)
2949 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2950 """Sleep and poll for an instance's disk to sync.
2953 if not instance.disks or disks is not None and not disks:
2956 disks = _ExpandCheckDisks(instance, disks)
2959 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2961 node = instance.primary_node
2964 lu.cfg.SetDiskID(dev, node)
2966 # TODO: Convert to utils.Retry
2969 degr_retries = 10 # in seconds, as we sleep 1 second each time
2973 cumul_degraded = False
2974 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2975 msg = rstats.fail_msg
2977 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2980 raise errors.RemoteError("Can't contact node %s for mirror data,"
2981 " aborting." % node)
2984 rstats = rstats.payload
2986 for i, mstat in enumerate(rstats):
2988 lu.LogWarning("Can't compute data for node %s/%s",
2989 node, disks[i].iv_name)
2992 cumul_degraded = (cumul_degraded or
2993 (mstat.is_degraded and mstat.sync_percent is None))
2994 if mstat.sync_percent is not None:
2996 if mstat.estimated_time is not None:
2997 rem_time = ("%s remaining (estimated)" %
2998 utils.FormatSeconds(mstat.estimated_time))
2999 max_time = mstat.estimated_time
3001 rem_time = "no time estimate"
3002 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3003 (disks[i].iv_name, mstat.sync_percent, rem_time))
3005 # if we're done but degraded, let's do a few small retries, to
3006 # make sure we see a stable and not transient situation; therefore
3007 # we force restart of the loop
3008 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3009 logging.info("Degraded disks found, %d retries left", degr_retries)
3017 time.sleep(min(60, max_time))
3020 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3021 return not cumul_degraded
3024 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3025 """Check that mirrors are not degraded.
3027 The ldisk parameter, if True, will change the test from the
3028 is_degraded attribute (which represents overall non-ok status for
3029 the device(s)) to the ldisk (representing the local storage status).
3032 lu.cfg.SetDiskID(dev, node)
3036 if on_primary or dev.AssembleOnSecondary():
3037 rstats = lu.rpc.call_blockdev_find(node, dev)
3038 msg = rstats.fail_msg
3040 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3042 elif not rstats.payload:
3043 lu.LogWarning("Can't find disk on node %s", node)
3047 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3049 result = result and not rstats.payload.is_degraded
3052 for child in dev.children:
3053 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3058 class LUDiagnoseOS(NoHooksLU):
3059 """Logical unit for OS diagnose/query.
3064 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3067 _FIELDS_STATIC = utils.FieldSet()
3068 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
3069 "parameters", "api_versions")
3071 def CheckArguments(self):
3073 raise errors.OpPrereqError("Selective OS query not supported",
3076 _CheckOutputFields(static=self._FIELDS_STATIC,
3077 dynamic=self._FIELDS_DYNAMIC,
3078 selected=self.op.output_fields)
3080 def ExpandNames(self):
3081 # Lock all nodes, in shared mode
3082 # Temporary removal of locks, should be reverted later
3083 # TODO: reintroduce locks when they are lighter-weight
3084 self.needed_locks = {}
3085 #self.share_locks[locking.LEVEL_NODE] = 1
3086 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3089 def _DiagnoseByOS(rlist):
3090 """Remaps a per-node return list into an a per-os per-node dictionary
3092 @param rlist: a map with node names as keys and OS objects as values
3095 @return: a dictionary with osnames as keys and as value another
3096 map, with nodes as keys and tuples of (path, status, diagnose,
3097 variants, parameters, api_versions) as values, eg::
3099 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3100 (/srv/..., False, "invalid api")],
3101 "node2": [(/srv/..., True, "", [], [])]}
3106 # we build here the list of nodes that didn't fail the RPC (at RPC
3107 # level), so that nodes with a non-responding node daemon don't
3108 # make all OSes invalid
3109 good_nodes = [node_name for node_name in rlist
3110 if not rlist[node_name].fail_msg]
3111 for node_name, nr in rlist.items():
3112 if nr.fail_msg or not nr.payload:
3114 for (name, path, status, diagnose, variants,
3115 params, api_versions) in nr.payload:
3116 if name not in all_os:
3117 # build a list of nodes for this os containing empty lists
3118 # for each node in node_list
3120 for nname in good_nodes:
3121 all_os[name][nname] = []
3122 # convert params from [name, help] to (name, help)
3123 params = [tuple(v) for v in params]
3124 all_os[name][node_name].append((path, status, diagnose,
3125 variants, params, api_versions))
3128 def Exec(self, feedback_fn):
3129 """Compute the list of OSes.
3132 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3133 node_data = self.rpc.call_os_diagnose(valid_nodes)
3134 pol = self._DiagnoseByOS(node_data)
3137 for os_name, os_data in pol.items():
3140 (variants, params, api_versions) = null_state = (set(), set(), set())
3141 for idx, osl in enumerate(os_data.values()):
3142 valid = bool(valid and osl and osl[0][1])
3144 (variants, params, api_versions) = null_state
3146 node_variants, node_params, node_api = osl[0][3:6]
3147 if idx == 0: # first entry
3148 variants = set(node_variants)
3149 params = set(node_params)
3150 api_versions = set(node_api)
3151 else: # keep consistency
3152 variants.intersection_update(node_variants)
3153 params.intersection_update(node_params)
3154 api_versions.intersection_update(node_api)
3156 for field in self.op.output_fields:
3159 elif field == "valid":
3161 elif field == "node_status":
3162 # this is just a copy of the dict
3164 for node_name, nos_list in os_data.items():
3165 val[node_name] = nos_list
3166 elif field == "variants":
3167 val = list(variants)
3168 elif field == "parameters":
3170 elif field == "api_versions":
3171 val = list(api_versions)
3173 raise errors.ParameterError(field)
3180 class LURemoveNode(LogicalUnit):
3181 """Logical unit for removing a node.
3184 HPATH = "node-remove"
3185 HTYPE = constants.HTYPE_NODE
3190 def BuildHooksEnv(self):
3193 This doesn't run on the target node in the pre phase as a failed
3194 node would then be impossible to remove.
3198 "OP_TARGET": self.op.node_name,
3199 "NODE_NAME": self.op.node_name,
3201 all_nodes = self.cfg.GetNodeList()
3203 all_nodes.remove(self.op.node_name)
3205 logging.warning("Node %s which is about to be removed not found"
3206 " in the all nodes list", self.op.node_name)
3207 return env, all_nodes, all_nodes
3209 def CheckPrereq(self):
3210 """Check prerequisites.
3213 - the node exists in the configuration
3214 - it does not have primary or secondary instances
3215 - it's not the master
3217 Any errors are signaled by raising errors.OpPrereqError.
3220 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3221 node = self.cfg.GetNodeInfo(self.op.node_name)
3222 assert node is not None
3224 instance_list = self.cfg.GetInstanceList()
3226 masternode = self.cfg.GetMasterNode()
3227 if node.name == masternode:
3228 raise errors.OpPrereqError("Node is the master node,"
3229 " you need to failover first.",
3232 for instance_name in instance_list:
3233 instance = self.cfg.GetInstanceInfo(instance_name)
3234 if node.name in instance.all_nodes:
3235 raise errors.OpPrereqError("Instance %s is still running on the node,"
3236 " please remove first." % instance_name,
3238 self.op.node_name = node.name
3241 def Exec(self, feedback_fn):
3242 """Removes the node from the cluster.
3246 logging.info("Stopping the node daemon and removing configs from node %s",
3249 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3251 # Promote nodes to master candidate as needed
3252 _AdjustCandidatePool(self, exceptions=[node.name])
3253 self.context.RemoveNode(node.name)
3255 # Run post hooks on the node before it's removed
3256 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3258 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3260 # pylint: disable-msg=W0702
3261 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3263 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3264 msg = result.fail_msg
3266 self.LogWarning("Errors encountered on the remote node while leaving"
3267 " the cluster: %s", msg)
3269 # Remove node from our /etc/hosts
3270 if self.cfg.GetClusterInfo().modify_etc_hosts:
3271 # FIXME: this should be done via an rpc call to node daemon
3272 utils.RemoveHostFromEtcHosts(node.name)
3273 _RedistributeAncillaryFiles(self)
3276 class LUQueryNodes(NoHooksLU):
3277 """Logical unit for querying nodes.
3280 # pylint: disable-msg=W0142
3283 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3284 ("use_locking", False, _TBool),
3288 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3289 "master_candidate", "offline", "drained"]
3291 _FIELDS_DYNAMIC = utils.FieldSet(
3293 "mtotal", "mnode", "mfree",
3295 "ctotal", "cnodes", "csockets",
3298 _FIELDS_STATIC = utils.FieldSet(*[
3299 "pinst_cnt", "sinst_cnt",
3300 "pinst_list", "sinst_list",
3301 "pip", "sip", "tags",
3303 "role"] + _SIMPLE_FIELDS
3306 def CheckArguments(self):
3307 _CheckOutputFields(static=self._FIELDS_STATIC,
3308 dynamic=self._FIELDS_DYNAMIC,
3309 selected=self.op.output_fields)
3311 def ExpandNames(self):
3312 self.needed_locks = {}
3313 self.share_locks[locking.LEVEL_NODE] = 1
3316 self.wanted = _GetWantedNodes(self, self.op.names)
3318 self.wanted = locking.ALL_SET
3320 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3321 self.do_locking = self.do_node_query and self.op.use_locking
3323 # if we don't request only static fields, we need to lock the nodes
3324 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3326 def Exec(self, feedback_fn):
3327 """Computes the list of nodes and their attributes.
3330 all_info = self.cfg.GetAllNodesInfo()
3332 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3333 elif self.wanted != locking.ALL_SET:
3334 nodenames = self.wanted
3335 missing = set(nodenames).difference(all_info.keys())
3337 raise errors.OpExecError(
3338 "Some nodes were removed before retrieving their data: %s" % missing)
3340 nodenames = all_info.keys()
3342 nodenames = utils.NiceSort(nodenames)
3343 nodelist = [all_info[name] for name in nodenames]
3345 # begin data gathering
3347 if self.do_node_query:
3349 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3350 self.cfg.GetHypervisorType())
3351 for name in nodenames:
3352 nodeinfo = node_data[name]
3353 if not nodeinfo.fail_msg and nodeinfo.payload:
3354 nodeinfo = nodeinfo.payload
3355 fn = utils.TryConvert
3357 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3358 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3359 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3360 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3361 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3362 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3363 "bootid": nodeinfo.get('bootid', None),
3364 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3365 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3368 live_data[name] = {}
3370 live_data = dict.fromkeys(nodenames, {})
3372 node_to_primary = dict([(name, set()) for name in nodenames])
3373 node_to_secondary = dict([(name, set()) for name in nodenames])
3375 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3376 "sinst_cnt", "sinst_list"))
3377 if inst_fields & frozenset(self.op.output_fields):
3378 inst_data = self.cfg.GetAllInstancesInfo()
3380 for inst in inst_data.values():
3381 if inst.primary_node in node_to_primary:
3382 node_to_primary[inst.primary_node].add(inst.name)
3383 for secnode in inst.secondary_nodes:
3384 if secnode in node_to_secondary:
3385 node_to_secondary[secnode].add(inst.name)
3387 master_node = self.cfg.GetMasterNode()
3389 # end data gathering
3392 for node in nodelist:
3394 for field in self.op.output_fields:
3395 if field in self._SIMPLE_FIELDS:
3396 val = getattr(node, field)
3397 elif field == "pinst_list":
3398 val = list(node_to_primary[node.name])
3399 elif field == "sinst_list":
3400 val = list(node_to_secondary[node.name])
3401 elif field == "pinst_cnt":
3402 val = len(node_to_primary[node.name])
3403 elif field == "sinst_cnt":
3404 val = len(node_to_secondary[node.name])
3405 elif field == "pip":
3406 val = node.primary_ip
3407 elif field == "sip":
3408 val = node.secondary_ip
3409 elif field == "tags":
3410 val = list(node.GetTags())
3411 elif field == "master":
3412 val = node.name == master_node
3413 elif self._FIELDS_DYNAMIC.Matches(field):
3414 val = live_data[node.name].get(field, None)
3415 elif field == "role":
3416 if node.name == master_node:
3418 elif node.master_candidate:
3427 raise errors.ParameterError(field)
3428 node_output.append(val)
3429 output.append(node_output)
3434 class LUQueryNodeVolumes(NoHooksLU):
3435 """Logical unit for getting volumes on node(s).
3439 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3440 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3443 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3444 _FIELDS_STATIC = utils.FieldSet("node")
3446 def CheckArguments(self):
3447 _CheckOutputFields(static=self._FIELDS_STATIC,
3448 dynamic=self._FIELDS_DYNAMIC,
3449 selected=self.op.output_fields)
3451 def ExpandNames(self):
3452 self.needed_locks = {}
3453 self.share_locks[locking.LEVEL_NODE] = 1
3454 if not self.op.nodes:
3455 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3457 self.needed_locks[locking.LEVEL_NODE] = \
3458 _GetWantedNodes(self, self.op.nodes)
3460 def Exec(self, feedback_fn):
3461 """Computes the list of nodes and their attributes.
3464 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3465 volumes = self.rpc.call_node_volumes(nodenames)
3467 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3468 in self.cfg.GetInstanceList()]
3470 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3473 for node in nodenames:
3474 nresult = volumes[node]
3477 msg = nresult.fail_msg
3479 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3482 node_vols = nresult.payload[:]
3483 node_vols.sort(key=lambda vol: vol['dev'])
3485 for vol in node_vols:
3487 for field in self.op.output_fields:
3490 elif field == "phys":
3494 elif field == "name":
3496 elif field == "size":
3497 val = int(float(vol['size']))
3498 elif field == "instance":
3500 if node not in lv_by_node[inst]:
3502 if vol['name'] in lv_by_node[inst][node]:
3508 raise errors.ParameterError(field)
3509 node_output.append(str(val))
3511 output.append(node_output)
3516 class LUQueryNodeStorage(NoHooksLU):
3517 """Logical unit for getting information on storage units on node(s).
3520 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3522 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3523 ("storage_type", _NoDefault, _CheckStorageType),
3524 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3525 ("name", None, _TMaybeString),
3529 def CheckArguments(self):
3530 _CheckOutputFields(static=self._FIELDS_STATIC,
3531 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3532 selected=self.op.output_fields)
3534 def ExpandNames(self):
3535 self.needed_locks = {}
3536 self.share_locks[locking.LEVEL_NODE] = 1
3539 self.needed_locks[locking.LEVEL_NODE] = \
3540 _GetWantedNodes(self, self.op.nodes)
3542 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3544 def Exec(self, feedback_fn):
3545 """Computes the list of nodes and their attributes.
3548 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3550 # Always get name to sort by
3551 if constants.SF_NAME in self.op.output_fields:
3552 fields = self.op.output_fields[:]
3554 fields = [constants.SF_NAME] + self.op.output_fields
3556 # Never ask for node or type as it's only known to the LU
3557 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3558 while extra in fields:
3559 fields.remove(extra)
3561 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3562 name_idx = field_idx[constants.SF_NAME]
3564 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3565 data = self.rpc.call_storage_list(self.nodes,
3566 self.op.storage_type, st_args,
3567 self.op.name, fields)
3571 for node in utils.NiceSort(self.nodes):
3572 nresult = data[node]
3576 msg = nresult.fail_msg
3578 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3581 rows = dict([(row[name_idx], row) for row in nresult.payload])
3583 for name in utils.NiceSort(rows.keys()):
3588 for field in self.op.output_fields:
3589 if field == constants.SF_NODE:
3591 elif field == constants.SF_TYPE:
3592 val = self.op.storage_type
3593 elif field in field_idx:
3594 val = row[field_idx[field]]
3596 raise errors.ParameterError(field)
3605 class LUModifyNodeStorage(NoHooksLU):
3606 """Logical unit for modifying a storage volume on a node.
3611 ("storage_type", _NoDefault, _CheckStorageType),
3612 ("name", _NoDefault, _TNonEmptyString),
3613 ("changes", _NoDefault, _TDict),
3617 def CheckArguments(self):
3618 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3620 storage_type = self.op.storage_type
3623 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3625 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3626 " modified" % storage_type,
3629 diff = set(self.op.changes.keys()) - modifiable
3631 raise errors.OpPrereqError("The following fields can not be modified for"
3632 " storage units of type '%s': %r" %
3633 (storage_type, list(diff)),
3636 def ExpandNames(self):
3637 self.needed_locks = {
3638 locking.LEVEL_NODE: self.op.node_name,
3641 def Exec(self, feedback_fn):
3642 """Computes the list of nodes and their attributes.
3645 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3646 result = self.rpc.call_storage_modify(self.op.node_name,
3647 self.op.storage_type, st_args,
3648 self.op.name, self.op.changes)
3649 result.Raise("Failed to modify storage unit '%s' on %s" %
3650 (self.op.name, self.op.node_name))
3653 class LUAddNode(LogicalUnit):
3654 """Logical unit for adding node to the cluster.
3658 HTYPE = constants.HTYPE_NODE
3661 ("primary_ip", None, _NoType),
3662 ("secondary_ip", None, _TMaybeString),
3663 ("readd", False, _TBool),
3666 def CheckArguments(self):
3667 # validate/normalize the node name
3668 self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3670 def BuildHooksEnv(self):
3673 This will run on all nodes before, and on all nodes + the new node after.
3677 "OP_TARGET": self.op.node_name,
3678 "NODE_NAME": self.op.node_name,
3679 "NODE_PIP": self.op.primary_ip,
3680 "NODE_SIP": self.op.secondary_ip,
3682 nodes_0 = self.cfg.GetNodeList()
3683 nodes_1 = nodes_0 + [self.op.node_name, ]
3684 return env, nodes_0, nodes_1
3686 def CheckPrereq(self):
3687 """Check prerequisites.
3690 - the new node is not already in the config
3692 - its parameters (single/dual homed) matches the cluster
3694 Any errors are signaled by raising errors.OpPrereqError.
3697 node_name = self.op.node_name
3700 dns_data = netutils.GetHostInfo(node_name)
3702 node = dns_data.name
3703 primary_ip = self.op.primary_ip = dns_data.ip
3704 if self.op.secondary_ip is None:
3705 self.op.secondary_ip = primary_ip
3706 if not netutils.IsValidIP4(self.op.secondary_ip):
3707 raise errors.OpPrereqError("Invalid secondary IP given",
3709 secondary_ip = self.op.secondary_ip
3711 node_list = cfg.GetNodeList()
3712 if not self.op.readd and node in node_list:
3713 raise errors.OpPrereqError("Node %s is already in the configuration" %
3714 node, errors.ECODE_EXISTS)
3715 elif self.op.readd and node not in node_list:
3716 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3719 self.changed_primary_ip = False
3721 for existing_node_name in node_list:
3722 existing_node = cfg.GetNodeInfo(existing_node_name)
3724 if self.op.readd and node == existing_node_name:
3725 if existing_node.secondary_ip != secondary_ip:
3726 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3727 " address configuration as before",
3729 if existing_node.primary_ip != primary_ip:
3730 self.changed_primary_ip = True
3734 if (existing_node.primary_ip == primary_ip or
3735 existing_node.secondary_ip == primary_ip or
3736 existing_node.primary_ip == secondary_ip or
3737 existing_node.secondary_ip == secondary_ip):
3738 raise errors.OpPrereqError("New node ip address(es) conflict with"
3739 " existing node %s" % existing_node.name,
3740 errors.ECODE_NOTUNIQUE)
3742 # check that the type of the node (single versus dual homed) is the
3743 # same as for the master
3744 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3745 master_singlehomed = myself.secondary_ip == myself.primary_ip
3746 newbie_singlehomed = secondary_ip == primary_ip
3747 if master_singlehomed != newbie_singlehomed:
3748 if master_singlehomed:
3749 raise errors.OpPrereqError("The master has no private ip but the"
3750 " new node has one",
3753 raise errors.OpPrereqError("The master has a private ip but the"
3754 " new node doesn't have one",
3757 # checks reachability
3758 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3759 raise errors.OpPrereqError("Node not reachable by ping",
3760 errors.ECODE_ENVIRON)
3762 if not newbie_singlehomed:
3763 # check reachability from my secondary ip to newbie's secondary ip
3764 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3765 source=myself.secondary_ip):
3766 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3767 " based ping to noded port",
3768 errors.ECODE_ENVIRON)
3775 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3778 self.new_node = self.cfg.GetNodeInfo(node)
3779 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3781 self.new_node = objects.Node(name=node,
3782 primary_ip=primary_ip,
3783 secondary_ip=secondary_ip,
3784 master_candidate=self.master_candidate,
3785 offline=False, drained=False)
3787 def Exec(self, feedback_fn):
3788 """Adds the new node to the cluster.
3791 new_node = self.new_node
3792 node = new_node.name
3794 # for re-adds, reset the offline/drained/master-candidate flags;
3795 # we need to reset here, otherwise offline would prevent RPC calls
3796 # later in the procedure; this also means that if the re-add
3797 # fails, we are left with a non-offlined, broken node
3799 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3800 self.LogInfo("Readding a node, the offline/drained flags were reset")
3801 # if we demote the node, we do cleanup later in the procedure
3802 new_node.master_candidate = self.master_candidate
3803 if self.changed_primary_ip:
3804 new_node.primary_ip = self.op.primary_ip
3806 # notify the user about any possible mc promotion
3807 if new_node.master_candidate:
3808 self.LogInfo("Node will be a master candidate")
3810 # check connectivity
3811 result = self.rpc.call_version([node])[node]
3812 result.Raise("Can't get version information from node %s" % node)
3813 if constants.PROTOCOL_VERSION == result.payload:
3814 logging.info("Communication to node %s fine, sw version %s match",
3815 node, result.payload)
3817 raise errors.OpExecError("Version mismatch master version %s,"
3818 " node version %s" %
3819 (constants.PROTOCOL_VERSION, result.payload))
3822 if self.cfg.GetClusterInfo().modify_ssh_setup:
3823 logging.info("Copy ssh key to node %s", node)
3824 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3826 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3827 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3831 keyarray.append(utils.ReadFile(i))
3833 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3834 keyarray[2], keyarray[3], keyarray[4],
3836 result.Raise("Cannot transfer ssh keys to the new node")
3838 # Add node to our /etc/hosts, and add key to known_hosts
3839 if self.cfg.GetClusterInfo().modify_etc_hosts:
3840 # FIXME: this should be done via an rpc call to node daemon
3841 utils.AddHostToEtcHosts(new_node.name)
3843 if new_node.secondary_ip != new_node.primary_ip:
3844 result = self.rpc.call_node_has_ip_address(new_node.name,
3845 new_node.secondary_ip)
3846 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3847 prereq=True, ecode=errors.ECODE_ENVIRON)
3848 if not result.payload:
3849 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3850 " you gave (%s). Please fix and re-run this"
3851 " command." % new_node.secondary_ip)
3853 node_verify_list = [self.cfg.GetMasterNode()]
3854 node_verify_param = {
3855 constants.NV_NODELIST: [node],
3856 # TODO: do a node-net-test as well?
3859 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3860 self.cfg.GetClusterName())
3861 for verifier in node_verify_list:
3862 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3863 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3865 for failed in nl_payload:
3866 feedback_fn("ssh/hostname verification failed"
3867 " (checking from %s): %s" %
3868 (verifier, nl_payload[failed]))
3869 raise errors.OpExecError("ssh/hostname verification failed.")
3872 _RedistributeAncillaryFiles(self)
3873 self.context.ReaddNode(new_node)
3874 # make sure we redistribute the config
3875 self.cfg.Update(new_node, feedback_fn)
3876 # and make sure the new node will not have old files around
3877 if not new_node.master_candidate:
3878 result = self.rpc.call_node_demote_from_mc(new_node.name)
3879 msg = result.fail_msg
3881 self.LogWarning("Node failed to demote itself from master"
3882 " candidate status: %s" % msg)
3884 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3885 self.context.AddNode(new_node, self.proc.GetECId())
3888 class LUSetNodeParams(LogicalUnit):
3889 """Modifies the parameters of a node.
3892 HPATH = "node-modify"
3893 HTYPE = constants.HTYPE_NODE
3896 ("master_candidate", None, _TMaybeBool),
3897 ("offline", None, _TMaybeBool),
3898 ("drained", None, _TMaybeBool),
3899 ("auto_promote", False, _TBool),
3904 def CheckArguments(self):
3905 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3906 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3907 if all_mods.count(None) == 3:
3908 raise errors.OpPrereqError("Please pass at least one modification",
3910 if all_mods.count(True) > 1:
3911 raise errors.OpPrereqError("Can't set the node into more than one"
3912 " state at the same time",
3915 # Boolean value that tells us whether we're offlining or draining the node
3916 self.offline_or_drain = (self.op.offline == True or
3917 self.op.drained == True)
3918 self.deoffline_or_drain = (self.op.offline == False or
3919 self.op.drained == False)
3920 self.might_demote = (self.op.master_candidate == False or
3921 self.offline_or_drain)
3923 self.lock_all = self.op.auto_promote and self.might_demote
3926 def ExpandNames(self):
3928 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3930 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3932 def BuildHooksEnv(self):
3935 This runs on the master node.
3939 "OP_TARGET": self.op.node_name,
3940 "MASTER_CANDIDATE": str(self.op.master_candidate),
3941 "OFFLINE": str(self.op.offline),
3942 "DRAINED": str(self.op.drained),
3944 nl = [self.cfg.GetMasterNode(),
3948 def CheckPrereq(self):
3949 """Check prerequisites.
3951 This only checks the instance list against the existing names.
3954 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3956 if (self.op.master_candidate is not None or
3957 self.op.drained is not None or
3958 self.op.offline is not None):
3959 # we can't change the master's node flags
3960 if self.op.node_name == self.cfg.GetMasterNode():
3961 raise errors.OpPrereqError("The master role can be changed"
3962 " only via masterfailover",
3966 if node.master_candidate and self.might_demote and not self.lock_all:
3967 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3968 # check if after removing the current node, we're missing master
3970 (mc_remaining, mc_should, _) = \
3971 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3972 if mc_remaining < mc_should:
3973 raise errors.OpPrereqError("Not enough master candidates, please"
3974 " pass auto_promote to allow promotion",
3977 if (self.op.master_candidate == True and
3978 ((node.offline and not self.op.offline == False) or
3979 (node.drained and not self.op.drained == False))):
3980 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3981 " to master_candidate" % node.name,
3984 # If we're being deofflined/drained, we'll MC ourself if needed
3985 if (self.deoffline_or_drain and not self.offline_or_drain and not
3986 self.op.master_candidate == True and not node.master_candidate):
3987 self.op.master_candidate = _DecideSelfPromotion(self)
3988 if self.op.master_candidate:
3989 self.LogInfo("Autopromoting node to master candidate")
3993 def Exec(self, feedback_fn):
4002 if self.op.offline is not None:
4003 node.offline = self.op.offline
4004 result.append(("offline", str(self.op.offline)))
4005 if self.op.offline == True:
4006 if node.master_candidate:
4007 node.master_candidate = False
4009 result.append(("master_candidate", "auto-demotion due to offline"))
4011 node.drained = False
4012 result.append(("drained", "clear drained status due to offline"))
4014 if self.op.master_candidate is not None:
4015 node.master_candidate = self.op.master_candidate
4017 result.append(("master_candidate", str(self.op.master_candidate)))
4018 if self.op.master_candidate == False:
4019 rrc = self.rpc.call_node_demote_from_mc(node.name)
4022 self.LogWarning("Node failed to demote itself: %s" % msg)
4024 if self.op.drained is not None:
4025 node.drained = self.op.drained
4026 result.append(("drained", str(self.op.drained)))
4027 if self.op.drained == True:
4028 if node.master_candidate:
4029 node.master_candidate = False
4031 result.append(("master_candidate", "auto-demotion due to drain"))
4032 rrc = self.rpc.call_node_demote_from_mc(node.name)
4035 self.LogWarning("Node failed to demote itself: %s" % msg)
4037 node.offline = False
4038 result.append(("offline", "clear offline status due to drain"))
4040 # we locked all nodes, we adjust the CP before updating this node
4042 _AdjustCandidatePool(self, [node.name])
4044 # this will trigger configuration file update, if needed
4045 self.cfg.Update(node, feedback_fn)
4047 # this will trigger job queue propagation or cleanup
4049 self.context.ReaddNode(node)
4054 class LUPowercycleNode(NoHooksLU):
4055 """Powercycles a node.
4064 def CheckArguments(self):
4065 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4066 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4067 raise errors.OpPrereqError("The node is the master and the force"
4068 " parameter was not set",
4071 def ExpandNames(self):
4072 """Locking for PowercycleNode.
4074 This is a last-resort option and shouldn't block on other
4075 jobs. Therefore, we grab no locks.
4078 self.needed_locks = {}
4080 def Exec(self, feedback_fn):
4084 result = self.rpc.call_node_powercycle(self.op.node_name,
4085 self.cfg.GetHypervisorType())
4086 result.Raise("Failed to schedule the reboot")
4087 return result.payload
4090 class LUQueryClusterInfo(NoHooksLU):
4091 """Query cluster configuration.
4096 def ExpandNames(self):
4097 self.needed_locks = {}
4099 def Exec(self, feedback_fn):
4100 """Return cluster config.
4103 cluster = self.cfg.GetClusterInfo()
4106 # Filter just for enabled hypervisors
4107 for os_name, hv_dict in cluster.os_hvp.items():
4108 os_hvp[os_name] = {}
4109 for hv_name, hv_params in hv_dict.items():
4110 if hv_name in cluster.enabled_hypervisors:
4111 os_hvp[os_name][hv_name] = hv_params
4114 "software_version": constants.RELEASE_VERSION,
4115 "protocol_version": constants.PROTOCOL_VERSION,
4116 "config_version": constants.CONFIG_VERSION,
4117 "os_api_version": max(constants.OS_API_VERSIONS),
4118 "export_version": constants.EXPORT_VERSION,
4119 "architecture": (platform.architecture()[0], platform.machine()),
4120 "name": cluster.cluster_name,
4121 "master": cluster.master_node,
4122 "default_hypervisor": cluster.enabled_hypervisors[0],
4123 "enabled_hypervisors": cluster.enabled_hypervisors,
4124 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4125 for hypervisor_name in cluster.enabled_hypervisors]),
4127 "beparams": cluster.beparams,
4128 "osparams": cluster.osparams,
4129 "nicparams": cluster.nicparams,
4130 "candidate_pool_size": cluster.candidate_pool_size,
4131 "master_netdev": cluster.master_netdev,
4132 "volume_group_name": cluster.volume_group_name,
4133 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4134 "file_storage_dir": cluster.file_storage_dir,
4135 "maintain_node_health": cluster.maintain_node_health,
4136 "ctime": cluster.ctime,
4137 "mtime": cluster.mtime,
4138 "uuid": cluster.uuid,
4139 "tags": list(cluster.GetTags()),
4140 "uid_pool": cluster.uid_pool,
4141 "default_iallocator": cluster.default_iallocator,
4147 class LUQueryConfigValues(NoHooksLU):
4148 """Return configuration values.
4151 _OP_PARAMS = [_POutputFields]
4153 _FIELDS_DYNAMIC = utils.FieldSet()
4154 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4157 def CheckArguments(self):
4158 _CheckOutputFields(static=self._FIELDS_STATIC,
4159 dynamic=self._FIELDS_DYNAMIC,
4160 selected=self.op.output_fields)
4162 def ExpandNames(self):
4163 self.needed_locks = {}
4165 def Exec(self, feedback_fn):
4166 """Dump a representation of the cluster config to the standard output.
4170 for field in self.op.output_fields:
4171 if field == "cluster_name":
4172 entry = self.cfg.GetClusterName()
4173 elif field == "master_node":
4174 entry = self.cfg.GetMasterNode()
4175 elif field == "drain_flag":
4176 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4177 elif field == "watcher_pause":
4178 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4180 raise errors.ParameterError(field)
4181 values.append(entry)
4185 class LUActivateInstanceDisks(NoHooksLU):
4186 """Bring up an instance's disks.
4191 ("ignore_size", False, _TBool),
4195 def ExpandNames(self):
4196 self._ExpandAndLockInstance()
4197 self.needed_locks[locking.LEVEL_NODE] = []
4198 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4200 def DeclareLocks(self, level):
4201 if level == locking.LEVEL_NODE:
4202 self._LockInstancesNodes()
4204 def CheckPrereq(self):
4205 """Check prerequisites.
4207 This checks that the instance is in the cluster.
4210 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4211 assert self.instance is not None, \
4212 "Cannot retrieve locked instance %s" % self.op.instance_name
4213 _CheckNodeOnline(self, self.instance.primary_node)
4215 def Exec(self, feedback_fn):
4216 """Activate the disks.
4219 disks_ok, disks_info = \
4220 _AssembleInstanceDisks(self, self.instance,
4221 ignore_size=self.op.ignore_size)
4223 raise errors.OpExecError("Cannot activate block devices")
4228 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4230 """Prepare the block devices for an instance.
4232 This sets up the block devices on all nodes.
4234 @type lu: L{LogicalUnit}
4235 @param lu: the logical unit on whose behalf we execute
4236 @type instance: L{objects.Instance}
4237 @param instance: the instance for whose disks we assemble
4238 @type disks: list of L{objects.Disk} or None
4239 @param disks: which disks to assemble (or all, if None)
4240 @type ignore_secondaries: boolean
4241 @param ignore_secondaries: if true, errors on secondary nodes
4242 won't result in an error return from the function
4243 @type ignore_size: boolean
4244 @param ignore_size: if true, the current known size of the disk
4245 will not be used during the disk activation, useful for cases
4246 when the size is wrong
4247 @return: False if the operation failed, otherwise a list of
4248 (host, instance_visible_name, node_visible_name)
4249 with the mapping from node devices to instance devices
4254 iname = instance.name
4255 disks = _ExpandCheckDisks(instance, disks)
4257 # With the two passes mechanism we try to reduce the window of
4258 # opportunity for the race condition of switching DRBD to primary
4259 # before handshaking occured, but we do not eliminate it
4261 # The proper fix would be to wait (with some limits) until the
4262 # connection has been made and drbd transitions from WFConnection
4263 # into any other network-connected state (Connected, SyncTarget,
4266 # 1st pass, assemble on all nodes in secondary mode
4267 for inst_disk in disks:
4268 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4270 node_disk = node_disk.Copy()
4271 node_disk.UnsetSize()
4272 lu.cfg.SetDiskID(node_disk, node)
4273 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4274 msg = result.fail_msg
4276 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4277 " (is_primary=False, pass=1): %s",
4278 inst_disk.iv_name, node, msg)
4279 if not ignore_secondaries:
4282 # FIXME: race condition on drbd migration to primary
4284 # 2nd pass, do only the primary node
4285 for inst_disk in disks:
4288 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4289 if node != instance.primary_node:
4292 node_disk = node_disk.Copy()
4293 node_disk.UnsetSize()
4294 lu.cfg.SetDiskID(node_disk, node)
4295 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4296 msg = result.fail_msg
4298 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4299 " (is_primary=True, pass=2): %s",
4300 inst_disk.iv_name, node, msg)
4303 dev_path = result.payload
4305 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4307 # leave the disks configured for the primary node
4308 # this is a workaround that would be fixed better by
4309 # improving the logical/physical id handling
4311 lu.cfg.SetDiskID(disk, instance.primary_node)
4313 return disks_ok, device_info
4316 def _StartInstanceDisks(lu, instance, force):
4317 """Start the disks of an instance.
4320 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4321 ignore_secondaries=force)
4323 _ShutdownInstanceDisks(lu, instance)
4324 if force is not None and not force:
4325 lu.proc.LogWarning("", hint="If the message above refers to a"
4327 " you can retry the operation using '--force'.")
4328 raise errors.OpExecError("Disk consistency error")
4331 class LUDeactivateInstanceDisks(NoHooksLU):
4332 """Shutdown an instance's disks.
4340 def ExpandNames(self):
4341 self._ExpandAndLockInstance()
4342 self.needed_locks[locking.LEVEL_NODE] = []
4343 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4345 def DeclareLocks(self, level):
4346 if level == locking.LEVEL_NODE:
4347 self._LockInstancesNodes()
4349 def CheckPrereq(self):
4350 """Check prerequisites.
4352 This checks that the instance is in the cluster.
4355 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4356 assert self.instance is not None, \
4357 "Cannot retrieve locked instance %s" % self.op.instance_name
4359 def Exec(self, feedback_fn):
4360 """Deactivate the disks
4363 instance = self.instance
4364 _SafeShutdownInstanceDisks(self, instance)
4367 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4368 """Shutdown block devices of an instance.
4370 This function checks if an instance is running, before calling
4371 _ShutdownInstanceDisks.
4374 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4375 _ShutdownInstanceDisks(lu, instance, disks=disks)
4378 def _ExpandCheckDisks(instance, disks):
4379 """Return the instance disks selected by the disks list
4381 @type disks: list of L{objects.Disk} or None
4382 @param disks: selected disks
4383 @rtype: list of L{objects.Disk}
4384 @return: selected instance disks to act on
4388 return instance.disks
4390 if not set(disks).issubset(instance.disks):
4391 raise errors.ProgrammerError("Can only act on disks belonging to the"
4396 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4397 """Shutdown block devices of an instance.
4399 This does the shutdown on all nodes of the instance.
4401 If the ignore_primary is false, errors on the primary node are
4406 disks = _ExpandCheckDisks(instance, disks)
4409 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4410 lu.cfg.SetDiskID(top_disk, node)
4411 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4412 msg = result.fail_msg
4414 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4415 disk.iv_name, node, msg)
4416 if not ignore_primary or node != instance.primary_node:
4421 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4422 """Checks if a node has enough free memory.
4424 This function check if a given node has the needed amount of free
4425 memory. In case the node has less memory or we cannot get the
4426 information from the node, this function raise an OpPrereqError
4429 @type lu: C{LogicalUnit}
4430 @param lu: a logical unit from which we get configuration data
4432 @param node: the node to check
4433 @type reason: C{str}
4434 @param reason: string to use in the error message
4435 @type requested: C{int}
4436 @param requested: the amount of memory in MiB to check for
4437 @type hypervisor_name: C{str}
4438 @param hypervisor_name: the hypervisor to ask for memory stats
4439 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4440 we cannot check the node
4443 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4444 nodeinfo[node].Raise("Can't get data from node %s" % node,
4445 prereq=True, ecode=errors.ECODE_ENVIRON)
4446 free_mem = nodeinfo[node].payload.get('memory_free', None)
4447 if not isinstance(free_mem, int):
4448 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4449 " was '%s'" % (node, free_mem),
4450 errors.ECODE_ENVIRON)
4451 if requested > free_mem:
4452 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4453 " needed %s MiB, available %s MiB" %
4454 (node, reason, requested, free_mem),
4458 def _CheckNodesFreeDisk(lu, nodenames, requested):
4459 """Checks if nodes have enough free disk space in the default VG.
4461 This function check if all given nodes have the needed amount of
4462 free disk. In case any node has less disk or we cannot get the
4463 information from the node, this function raise an OpPrereqError
4466 @type lu: C{LogicalUnit}
4467 @param lu: a logical unit from which we get configuration data
4468 @type nodenames: C{list}
4469 @param nodenames: the list of node names to check
4470 @type requested: C{int}
4471 @param requested: the amount of disk in MiB to check for
4472 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4473 we cannot check the node
4476 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4477 lu.cfg.GetHypervisorType())
4478 for node in nodenames:
4479 info = nodeinfo[node]
4480 info.Raise("Cannot get current information from node %s" % node,
4481 prereq=True, ecode=errors.ECODE_ENVIRON)
4482 vg_free = info.payload.get("vg_free", None)
4483 if not isinstance(vg_free, int):
4484 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4485 " result was '%s'" % (node, vg_free),
4486 errors.ECODE_ENVIRON)
4487 if requested > vg_free:
4488 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4489 " required %d MiB, available %d MiB" %
4490 (node, requested, vg_free),
4494 class LUStartupInstance(LogicalUnit):
4495 """Starts an instance.
4498 HPATH = "instance-start"
4499 HTYPE = constants.HTYPE_INSTANCE
4503 ("hvparams", _EmptyDict, _TDict),
4504 ("beparams", _EmptyDict, _TDict),
4508 def CheckArguments(self):
4510 if self.op.beparams:
4511 # fill the beparams dict
4512 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4514 def ExpandNames(self):
4515 self._ExpandAndLockInstance()
4517 def BuildHooksEnv(self):
4520 This runs on master, primary and secondary nodes of the instance.
4524 "FORCE": self.op.force,
4526 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4527 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4530 def CheckPrereq(self):
4531 """Check prerequisites.
4533 This checks that the instance is in the cluster.
4536 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4537 assert self.instance is not None, \
4538 "Cannot retrieve locked instance %s" % self.op.instance_name
4541 if self.op.hvparams:
4542 # check hypervisor parameter syntax (locally)
4543 cluster = self.cfg.GetClusterInfo()
4544 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4545 filled_hvp = cluster.FillHV(instance)
4546 filled_hvp.update(self.op.hvparams)
4547 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4548 hv_type.CheckParameterSyntax(filled_hvp)
4549 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4551 _CheckNodeOnline(self, instance.primary_node)
4553 bep = self.cfg.GetClusterInfo().FillBE(instance)
4554 # check bridges existence
4555 _CheckInstanceBridgesExist(self, instance)
4557 remote_info = self.rpc.call_instance_info(instance.primary_node,
4559 instance.hypervisor)
4560 remote_info.Raise("Error checking node %s" % instance.primary_node,
4561 prereq=True, ecode=errors.ECODE_ENVIRON)
4562 if not remote_info.payload: # not running already
4563 _CheckNodeFreeMemory(self, instance.primary_node,
4564 "starting instance %s" % instance.name,
4565 bep[constants.BE_MEMORY], instance.hypervisor)
4567 def Exec(self, feedback_fn):
4568 """Start the instance.
4571 instance = self.instance
4572 force = self.op.force
4574 self.cfg.MarkInstanceUp(instance.name)
4576 node_current = instance.primary_node
4578 _StartInstanceDisks(self, instance, force)
4580 result = self.rpc.call_instance_start(node_current, instance,
4581 self.op.hvparams, self.op.beparams)
4582 msg = result.fail_msg
4584 _ShutdownInstanceDisks(self, instance)
4585 raise errors.OpExecError("Could not start instance: %s" % msg)
4588 class LURebootInstance(LogicalUnit):
4589 """Reboot an instance.
4592 HPATH = "instance-reboot"
4593 HTYPE = constants.HTYPE_INSTANCE
4596 ("ignore_secondaries", False, _TBool),
4597 ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4602 def ExpandNames(self):
4603 self._ExpandAndLockInstance()
4605 def BuildHooksEnv(self):
4608 This runs on master, primary and secondary nodes of the instance.
4612 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4613 "REBOOT_TYPE": self.op.reboot_type,
4614 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4616 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4617 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4620 def CheckPrereq(self):
4621 """Check prerequisites.
4623 This checks that the instance is in the cluster.
4626 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4627 assert self.instance is not None, \
4628 "Cannot retrieve locked instance %s" % self.op.instance_name
4630 _CheckNodeOnline(self, instance.primary_node)
4632 # check bridges existence
4633 _CheckInstanceBridgesExist(self, instance)
4635 def Exec(self, feedback_fn):
4636 """Reboot the instance.
4639 instance = self.instance
4640 ignore_secondaries = self.op.ignore_secondaries
4641 reboot_type = self.op.reboot_type
4643 node_current = instance.primary_node
4645 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4646 constants.INSTANCE_REBOOT_HARD]:
4647 for disk in instance.disks:
4648 self.cfg.SetDiskID(disk, node_current)
4649 result = self.rpc.call_instance_reboot(node_current, instance,
4651 self.op.shutdown_timeout)
4652 result.Raise("Could not reboot instance")
4654 result = self.rpc.call_instance_shutdown(node_current, instance,
4655 self.op.shutdown_timeout)
4656 result.Raise("Could not shutdown instance for full reboot")
4657 _ShutdownInstanceDisks(self, instance)
4658 _StartInstanceDisks(self, instance, ignore_secondaries)
4659 result = self.rpc.call_instance_start(node_current, instance, None, None)
4660 msg = result.fail_msg
4662 _ShutdownInstanceDisks(self, instance)
4663 raise errors.OpExecError("Could not start instance for"
4664 " full reboot: %s" % msg)
4666 self.cfg.MarkInstanceUp(instance.name)
4669 class LUShutdownInstance(LogicalUnit):
4670 """Shutdown an instance.
4673 HPATH = "instance-stop"
4674 HTYPE = constants.HTYPE_INSTANCE
4677 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4681 def ExpandNames(self):
4682 self._ExpandAndLockInstance()
4684 def BuildHooksEnv(self):
4687 This runs on master, primary and secondary nodes of the instance.
4690 env = _BuildInstanceHookEnvByObject(self, self.instance)
4691 env["TIMEOUT"] = self.op.timeout
4692 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4695 def CheckPrereq(self):
4696 """Check prerequisites.
4698 This checks that the instance is in the cluster.
4701 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4702 assert self.instance is not None, \
4703 "Cannot retrieve locked instance %s" % self.op.instance_name
4704 _CheckNodeOnline(self, self.instance.primary_node)
4706 def Exec(self, feedback_fn):
4707 """Shutdown the instance.
4710 instance = self.instance
4711 node_current = instance.primary_node
4712 timeout = self.op.timeout
4713 self.cfg.MarkInstanceDown(instance.name)
4714 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4715 msg = result.fail_msg
4717 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4719 _ShutdownInstanceDisks(self, instance)
4722 class LUReinstallInstance(LogicalUnit):
4723 """Reinstall an instance.
4726 HPATH = "instance-reinstall"
4727 HTYPE = constants.HTYPE_INSTANCE
4730 ("os_type", None, _TMaybeString),
4731 ("force_variant", False, _TBool),
4735 def ExpandNames(self):
4736 self._ExpandAndLockInstance()
4738 def BuildHooksEnv(self):
4741 This runs on master, primary and secondary nodes of the instance.
4744 env = _BuildInstanceHookEnvByObject(self, self.instance)
4745 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4748 def CheckPrereq(self):
4749 """Check prerequisites.
4751 This checks that the instance is in the cluster and is not running.
4754 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4755 assert instance is not None, \
4756 "Cannot retrieve locked instance %s" % self.op.instance_name
4757 _CheckNodeOnline(self, instance.primary_node)
4759 if instance.disk_template == constants.DT_DISKLESS:
4760 raise errors.OpPrereqError("Instance '%s' has no disks" %
4761 self.op.instance_name,
4763 _CheckInstanceDown(self, instance, "cannot reinstall")
4765 if self.op.os_type is not None:
4767 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4768 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4770 self.instance = instance
4772 def Exec(self, feedback_fn):
4773 """Reinstall the instance.
4776 inst = self.instance
4778 if self.op.os_type is not None:
4779 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4780 inst.os = self.op.os_type
4781 self.cfg.Update(inst, feedback_fn)
4783 _StartInstanceDisks(self, inst, None)
4785 feedback_fn("Running the instance OS create scripts...")
4786 # FIXME: pass debug option from opcode to backend
4787 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4788 self.op.debug_level)
4789 result.Raise("Could not install OS for instance %s on node %s" %
4790 (inst.name, inst.primary_node))
4792 _ShutdownInstanceDisks(self, inst)
4795 class LURecreateInstanceDisks(LogicalUnit):
4796 """Recreate an instance's missing disks.
4799 HPATH = "instance-recreate-disks"
4800 HTYPE = constants.HTYPE_INSTANCE
4803 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4807 def ExpandNames(self):
4808 self._ExpandAndLockInstance()
4810 def BuildHooksEnv(self):
4813 This runs on master, primary and secondary nodes of the instance.
4816 env = _BuildInstanceHookEnvByObject(self, self.instance)
4817 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4820 def CheckPrereq(self):
4821 """Check prerequisites.
4823 This checks that the instance is in the cluster and is not running.
4826 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4827 assert instance is not None, \
4828 "Cannot retrieve locked instance %s" % self.op.instance_name
4829 _CheckNodeOnline(self, instance.primary_node)
4831 if instance.disk_template == constants.DT_DISKLESS:
4832 raise errors.OpPrereqError("Instance '%s' has no disks" %
4833 self.op.instance_name, errors.ECODE_INVAL)
4834 _CheckInstanceDown(self, instance, "cannot recreate disks")
4836 if not self.op.disks:
4837 self.op.disks = range(len(instance.disks))
4839 for idx in self.op.disks:
4840 if idx >= len(instance.disks):
4841 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4844 self.instance = instance
4846 def Exec(self, feedback_fn):
4847 """Recreate the disks.
4851 for idx, _ in enumerate(self.instance.disks):
4852 if idx not in self.op.disks: # disk idx has not been passed in
4856 _CreateDisks(self, self.instance, to_skip=to_skip)
4859 class LURenameInstance(LogicalUnit):
4860 """Rename an instance.
4863 HPATH = "instance-rename"
4864 HTYPE = constants.HTYPE_INSTANCE
4867 ("new_name", _NoDefault, _TNonEmptyString),
4868 ("ignore_ip", False, _TBool),
4869 ("check_name", True, _TBool),
4872 def BuildHooksEnv(self):
4875 This runs on master, primary and secondary nodes of the instance.
4878 env = _BuildInstanceHookEnvByObject(self, self.instance)
4879 env["INSTANCE_NEW_NAME"] = self.op.new_name
4880 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4883 def CheckPrereq(self):
4884 """Check prerequisites.
4886 This checks that the instance is in the cluster and is not running.
4889 self.op.instance_name = _ExpandInstanceName(self.cfg,
4890 self.op.instance_name)
4891 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4892 assert instance is not None
4893 _CheckNodeOnline(self, instance.primary_node)
4894 _CheckInstanceDown(self, instance, "cannot rename")
4895 self.instance = instance
4897 # new name verification
4898 if self.op.check_name:
4899 name_info = netutils.GetHostInfo(self.op.new_name)
4900 self.op.new_name = name_info.name
4902 new_name = self.op.new_name
4904 instance_list = self.cfg.GetInstanceList()
4905 if new_name in instance_list:
4906 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4907 new_name, errors.ECODE_EXISTS)
4909 if not self.op.ignore_ip:
4910 if netutils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4911 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4912 (name_info.ip, new_name),
4913 errors.ECODE_NOTUNIQUE)
4915 def Exec(self, feedback_fn):
4916 """Reinstall the instance.
4919 inst = self.instance
4920 old_name = inst.name
4922 if inst.disk_template == constants.DT_FILE:
4923 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4925 self.cfg.RenameInstance(inst.name, self.op.new_name)
4926 # Change the instance lock. This is definitely safe while we hold the BGL
4927 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4928 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4930 # re-read the instance from the configuration after rename
4931 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4933 if inst.disk_template == constants.DT_FILE:
4934 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4935 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4936 old_file_storage_dir,
4937 new_file_storage_dir)
4938 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4939 " (but the instance has been renamed in Ganeti)" %
4940 (inst.primary_node, old_file_storage_dir,
4941 new_file_storage_dir))
4943 _StartInstanceDisks(self, inst, None)
4945 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4946 old_name, self.op.debug_level)
4947 msg = result.fail_msg
4949 msg = ("Could not run OS rename script for instance %s on node %s"
4950 " (but the instance has been renamed in Ganeti): %s" %
4951 (inst.name, inst.primary_node, msg))
4952 self.proc.LogWarning(msg)
4954 _ShutdownInstanceDisks(self, inst)
4957 class LURemoveInstance(LogicalUnit):
4958 """Remove an instance.
4961 HPATH = "instance-remove"
4962 HTYPE = constants.HTYPE_INSTANCE
4965 ("ignore_failures", False, _TBool),
4970 def ExpandNames(self):
4971 self._ExpandAndLockInstance()
4972 self.needed_locks[locking.LEVEL_NODE] = []
4973 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4975 def DeclareLocks(self, level):
4976 if level == locking.LEVEL_NODE:
4977 self._LockInstancesNodes()
4979 def BuildHooksEnv(self):
4982 This runs on master, primary and secondary nodes of the instance.
4985 env = _BuildInstanceHookEnvByObject(self, self.instance)
4986 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4987 nl = [self.cfg.GetMasterNode()]
4988 nl_post = list(self.instance.all_nodes) + nl
4989 return env, nl, nl_post
4991 def CheckPrereq(self):
4992 """Check prerequisites.
4994 This checks that the instance is in the cluster.
4997 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4998 assert self.instance is not None, \
4999 "Cannot retrieve locked instance %s" % self.op.instance_name
5001 def Exec(self, feedback_fn):
5002 """Remove the instance.
5005 instance = self.instance
5006 logging.info("Shutting down instance %s on node %s",
5007 instance.name, instance.primary_node)
5009 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5010 self.op.shutdown_timeout)
5011 msg = result.fail_msg
5013 if self.op.ignore_failures:
5014 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5016 raise errors.OpExecError("Could not shutdown instance %s on"
5018 (instance.name, instance.primary_node, msg))
5020 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5023 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5024 """Utility function to remove an instance.
5027 logging.info("Removing block devices for instance %s", instance.name)
5029 if not _RemoveDisks(lu, instance):
5030 if not ignore_failures:
5031 raise errors.OpExecError("Can't remove instance's disks")
5032 feedback_fn("Warning: can't remove instance's disks")
5034 logging.info("Removing instance %s out of cluster config", instance.name)
5036 lu.cfg.RemoveInstance(instance.name)
5038 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5039 "Instance lock removal conflict"
5041 # Remove lock for the instance
5042 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5045 class LUQueryInstances(NoHooksLU):
5046 """Logical unit for querying instances.
5049 # pylint: disable-msg=W0142
5051 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5052 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5053 ("use_locking", False, _TBool),
5056 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5057 "serial_no", "ctime", "mtime", "uuid"]
5058 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5060 "disk_template", "ip", "mac", "bridge",
5061 "nic_mode", "nic_link",
5062 "sda_size", "sdb_size", "vcpus", "tags",
5063 "network_port", "beparams",
5064 r"(disk)\.(size)/([0-9]+)",
5065 r"(disk)\.(sizes)", "disk_usage",
5066 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5067 r"(nic)\.(bridge)/([0-9]+)",
5068 r"(nic)\.(macs|ips|modes|links|bridges)",
5069 r"(disk|nic)\.(count)",
5071 ] + _SIMPLE_FIELDS +
5073 for name in constants.HVS_PARAMETERS
5074 if name not in constants.HVC_GLOBALS] +
5076 for name in constants.BES_PARAMETERS])
5077 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5083 def CheckArguments(self):
5084 _CheckOutputFields(static=self._FIELDS_STATIC,
5085 dynamic=self._FIELDS_DYNAMIC,
5086 selected=self.op.output_fields)
5088 def ExpandNames(self):
5089 self.needed_locks = {}
5090 self.share_locks[locking.LEVEL_INSTANCE] = 1
5091 self.share_locks[locking.LEVEL_NODE] = 1
5094 self.wanted = _GetWantedInstances(self, self.op.names)
5096 self.wanted = locking.ALL_SET
5098 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5099 self.do_locking = self.do_node_query and self.op.use_locking
5101 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5102 self.needed_locks[locking.LEVEL_NODE] = []
5103 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5105 def DeclareLocks(self, level):
5106 if level == locking.LEVEL_NODE and self.do_locking:
5107 self._LockInstancesNodes()
5109 def Exec(self, feedback_fn):
5110 """Computes the list of nodes and their attributes.
5113 # pylint: disable-msg=R0912
5114 # way too many branches here
5115 all_info = self.cfg.GetAllInstancesInfo()
5116 if self.wanted == locking.ALL_SET:
5117 # caller didn't specify instance names, so ordering is not important
5119 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5121 instance_names = all_info.keys()
5122 instance_names = utils.NiceSort(instance_names)
5124 # caller did specify names, so we must keep the ordering
5126 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5128 tgt_set = all_info.keys()
5129 missing = set(self.wanted).difference(tgt_set)
5131 raise errors.OpExecError("Some instances were removed before"
5132 " retrieving their data: %s" % missing)
5133 instance_names = self.wanted
5135 instance_list = [all_info[iname] for iname in instance_names]
5137 # begin data gathering
5139 nodes = frozenset([inst.primary_node for inst in instance_list])
5140 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5144 if self.do_node_query:
5146 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5148 result = node_data[name]
5150 # offline nodes will be in both lists
5151 off_nodes.append(name)
5153 bad_nodes.append(name)
5156 live_data.update(result.payload)
5157 # else no instance is alive
5159 live_data = dict([(name, {}) for name in instance_names])
5161 # end data gathering
5166 cluster = self.cfg.GetClusterInfo()
5167 for instance in instance_list:
5169 i_hv = cluster.FillHV(instance, skip_globals=True)
5170 i_be = cluster.FillBE(instance)
5171 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5172 for field in self.op.output_fields:
5173 st_match = self._FIELDS_STATIC.Matches(field)
5174 if field in self._SIMPLE_FIELDS:
5175 val = getattr(instance, field)
5176 elif field == "pnode":
5177 val = instance.primary_node
5178 elif field == "snodes":
5179 val = list(instance.secondary_nodes)
5180 elif field == "admin_state":
5181 val = instance.admin_up
5182 elif field == "oper_state":
5183 if instance.primary_node in bad_nodes:
5186 val = bool(live_data.get(instance.name))
5187 elif field == "status":
5188 if instance.primary_node in off_nodes:
5189 val = "ERROR_nodeoffline"
5190 elif instance.primary_node in bad_nodes:
5191 val = "ERROR_nodedown"
5193 running = bool(live_data.get(instance.name))
5195 if instance.admin_up:
5200 if instance.admin_up:
5204 elif field == "oper_ram":
5205 if instance.primary_node in bad_nodes:
5207 elif instance.name in live_data:
5208 val = live_data[instance.name].get("memory", "?")
5211 elif field == "oper_vcpus":
5212 if instance.primary_node in bad_nodes:
5214 elif instance.name in live_data:
5215 val = live_data[instance.name].get("vcpus", "?")
5218 elif field == "vcpus":
5219 val = i_be[constants.BE_VCPUS]
5220 elif field == "disk_template":
5221 val = instance.disk_template
5224 val = instance.nics[0].ip
5227 elif field == "nic_mode":
5229 val = i_nicp[0][constants.NIC_MODE]
5232 elif field == "nic_link":
5234 val = i_nicp[0][constants.NIC_LINK]
5237 elif field == "bridge":
5238 if (instance.nics and
5239 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5240 val = i_nicp[0][constants.NIC_LINK]
5243 elif field == "mac":
5245 val = instance.nics[0].mac
5248 elif field == "sda_size" or field == "sdb_size":
5249 idx = ord(field[2]) - ord('a')
5251 val = instance.FindDisk(idx).size
5252 except errors.OpPrereqError:
5254 elif field == "disk_usage": # total disk usage per node
5255 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5256 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5257 elif field == "tags":
5258 val = list(instance.GetTags())
5259 elif field == "hvparams":
5261 elif (field.startswith(HVPREFIX) and
5262 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5263 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5264 val = i_hv.get(field[len(HVPREFIX):], None)
5265 elif field == "beparams":
5267 elif (field.startswith(BEPREFIX) and
5268 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5269 val = i_be.get(field[len(BEPREFIX):], None)
5270 elif st_match and st_match.groups():
5271 # matches a variable list
5272 st_groups = st_match.groups()
5273 if st_groups and st_groups[0] == "disk":
5274 if st_groups[1] == "count":
5275 val = len(instance.disks)
5276 elif st_groups[1] == "sizes":
5277 val = [disk.size for disk in instance.disks]
5278 elif st_groups[1] == "size":
5280 val = instance.FindDisk(st_groups[2]).size
5281 except errors.OpPrereqError:
5284 assert False, "Unhandled disk parameter"
5285 elif st_groups[0] == "nic":
5286 if st_groups[1] == "count":
5287 val = len(instance.nics)
5288 elif st_groups[1] == "macs":
5289 val = [nic.mac for nic in instance.nics]
5290 elif st_groups[1] == "ips":
5291 val = [nic.ip for nic in instance.nics]
5292 elif st_groups[1] == "modes":
5293 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5294 elif st_groups[1] == "links":
5295 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5296 elif st_groups[1] == "bridges":
5299 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5300 val.append(nicp[constants.NIC_LINK])
5305 nic_idx = int(st_groups[2])
5306 if nic_idx >= len(instance.nics):
5309 if st_groups[1] == "mac":
5310 val = instance.nics[nic_idx].mac
5311 elif st_groups[1] == "ip":
5312 val = instance.nics[nic_idx].ip
5313 elif st_groups[1] == "mode":
5314 val = i_nicp[nic_idx][constants.NIC_MODE]
5315 elif st_groups[1] == "link":
5316 val = i_nicp[nic_idx][constants.NIC_LINK]
5317 elif st_groups[1] == "bridge":
5318 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5319 if nic_mode == constants.NIC_MODE_BRIDGED:
5320 val = i_nicp[nic_idx][constants.NIC_LINK]
5324 assert False, "Unhandled NIC parameter"
5326 assert False, ("Declared but unhandled variable parameter '%s'" %
5329 assert False, "Declared but unhandled parameter '%s'" % field
5336 class LUFailoverInstance(LogicalUnit):
5337 """Failover an instance.
5340 HPATH = "instance-failover"
5341 HTYPE = constants.HTYPE_INSTANCE
5344 ("ignore_consistency", False, _TBool),
5349 def ExpandNames(self):
5350 self._ExpandAndLockInstance()
5351 self.needed_locks[locking.LEVEL_NODE] = []
5352 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5354 def DeclareLocks(self, level):
5355 if level == locking.LEVEL_NODE:
5356 self._LockInstancesNodes()
5358 def BuildHooksEnv(self):
5361 This runs on master, primary and secondary nodes of the instance.
5364 instance = self.instance
5365 source_node = instance.primary_node
5366 target_node = instance.secondary_nodes[0]
5368 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5369 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5370 "OLD_PRIMARY": source_node,
5371 "OLD_SECONDARY": target_node,
5372 "NEW_PRIMARY": target_node,
5373 "NEW_SECONDARY": source_node,
5375 env.update(_BuildInstanceHookEnvByObject(self, instance))
5376 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5378 nl_post.append(source_node)
5379 return env, nl, nl_post
5381 def CheckPrereq(self):
5382 """Check prerequisites.
5384 This checks that the instance is in the cluster.
5387 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5388 assert self.instance is not None, \
5389 "Cannot retrieve locked instance %s" % self.op.instance_name
5391 bep = self.cfg.GetClusterInfo().FillBE(instance)
5392 if instance.disk_template not in constants.DTS_NET_MIRROR:
5393 raise errors.OpPrereqError("Instance's disk layout is not"
5394 " network mirrored, cannot failover.",
5397 secondary_nodes = instance.secondary_nodes
5398 if not secondary_nodes:
5399 raise errors.ProgrammerError("no secondary node but using "
5400 "a mirrored disk template")
5402 target_node = secondary_nodes[0]
5403 _CheckNodeOnline(self, target_node)
5404 _CheckNodeNotDrained(self, target_node)
5405 if instance.admin_up:
5406 # check memory requirements on the secondary node
5407 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5408 instance.name, bep[constants.BE_MEMORY],
5409 instance.hypervisor)
5411 self.LogInfo("Not checking memory on the secondary node as"
5412 " instance will not be started")
5414 # check bridge existance
5415 _CheckInstanceBridgesExist(self, instance, node=target_node)
5417 def Exec(self, feedback_fn):
5418 """Failover an instance.
5420 The failover is done by shutting it down on its present node and
5421 starting it on the secondary.
5424 instance = self.instance
5426 source_node = instance.primary_node
5427 target_node = instance.secondary_nodes[0]
5429 if instance.admin_up:
5430 feedback_fn("* checking disk consistency between source and target")
5431 for dev in instance.disks:
5432 # for drbd, these are drbd over lvm
5433 if not _CheckDiskConsistency(self, dev, target_node, False):
5434 if not self.op.ignore_consistency:
5435 raise errors.OpExecError("Disk %s is degraded on target node,"
5436 " aborting failover." % dev.iv_name)
5438 feedback_fn("* not checking disk consistency as instance is not running")
5440 feedback_fn("* shutting down instance on source node")
5441 logging.info("Shutting down instance %s on node %s",
5442 instance.name, source_node)
5444 result = self.rpc.call_instance_shutdown(source_node, instance,
5445 self.op.shutdown_timeout)
5446 msg = result.fail_msg
5448 if self.op.ignore_consistency:
5449 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5450 " Proceeding anyway. Please make sure node"
5451 " %s is down. Error details: %s",
5452 instance.name, source_node, source_node, msg)
5454 raise errors.OpExecError("Could not shutdown instance %s on"
5456 (instance.name, source_node, msg))
5458 feedback_fn("* deactivating the instance's disks on source node")
5459 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5460 raise errors.OpExecError("Can't shut down the instance's disks.")
5462 instance.primary_node = target_node
5463 # distribute new instance config to the other nodes
5464 self.cfg.Update(instance, feedback_fn)
5466 # Only start the instance if it's marked as up
5467 if instance.admin_up:
5468 feedback_fn("* activating the instance's disks on target node")
5469 logging.info("Starting instance %s on node %s",
5470 instance.name, target_node)
5472 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5473 ignore_secondaries=True)
5475 _ShutdownInstanceDisks(self, instance)
5476 raise errors.OpExecError("Can't activate the instance's disks")
5478 feedback_fn("* starting the instance on the target node")
5479 result = self.rpc.call_instance_start(target_node, instance, None, None)
5480 msg = result.fail_msg
5482 _ShutdownInstanceDisks(self, instance)
5483 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5484 (instance.name, target_node, msg))
5487 class LUMigrateInstance(LogicalUnit):
5488 """Migrate an instance.
5490 This is migration without shutting down, compared to the failover,
5491 which is done with shutdown.
5494 HPATH = "instance-migrate"
5495 HTYPE = constants.HTYPE_INSTANCE
5499 ("cleanup", False, _TBool),
5504 def ExpandNames(self):
5505 self._ExpandAndLockInstance()
5507 self.needed_locks[locking.LEVEL_NODE] = []
5508 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5510 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5512 self.tasklets = [self._migrater]
5514 def DeclareLocks(self, level):
5515 if level == locking.LEVEL_NODE:
5516 self._LockInstancesNodes()
5518 def BuildHooksEnv(self):
5521 This runs on master, primary and secondary nodes of the instance.
5524 instance = self._migrater.instance
5525 source_node = instance.primary_node
5526 target_node = instance.secondary_nodes[0]
5527 env = _BuildInstanceHookEnvByObject(self, instance)
5528 env["MIGRATE_LIVE"] = self._migrater.live
5529 env["MIGRATE_CLEANUP"] = self.op.cleanup
5531 "OLD_PRIMARY": source_node,
5532 "OLD_SECONDARY": target_node,
5533 "NEW_PRIMARY": target_node,
5534 "NEW_SECONDARY": source_node,
5536 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5538 nl_post.append(source_node)
5539 return env, nl, nl_post
5542 class LUMoveInstance(LogicalUnit):
5543 """Move an instance by data-copying.
5546 HPATH = "instance-move"
5547 HTYPE = constants.HTYPE_INSTANCE
5550 ("target_node", _NoDefault, _TNonEmptyString),
5555 def ExpandNames(self):
5556 self._ExpandAndLockInstance()
5557 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5558 self.op.target_node = target_node
5559 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5560 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5562 def DeclareLocks(self, level):
5563 if level == locking.LEVEL_NODE:
5564 self._LockInstancesNodes(primary_only=True)
5566 def BuildHooksEnv(self):
5569 This runs on master, primary and secondary nodes of the instance.
5573 "TARGET_NODE": self.op.target_node,
5574 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5576 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5577 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5578 self.op.target_node]
5581 def CheckPrereq(self):
5582 """Check prerequisites.
5584 This checks that the instance is in the cluster.
5587 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5588 assert self.instance is not None, \
5589 "Cannot retrieve locked instance %s" % self.op.instance_name
5591 node = self.cfg.GetNodeInfo(self.op.target_node)
5592 assert node is not None, \
5593 "Cannot retrieve locked node %s" % self.op.target_node
5595 self.target_node = target_node = node.name
5597 if target_node == instance.primary_node:
5598 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5599 (instance.name, target_node),
5602 bep = self.cfg.GetClusterInfo().FillBE(instance)
5604 for idx, dsk in enumerate(instance.disks):
5605 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5606 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5607 " cannot copy" % idx, errors.ECODE_STATE)
5609 _CheckNodeOnline(self, target_node)
5610 _CheckNodeNotDrained(self, target_node)
5612 if instance.admin_up:
5613 # check memory requirements on the secondary node
5614 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5615 instance.name, bep[constants.BE_MEMORY],
5616 instance.hypervisor)
5618 self.LogInfo("Not checking memory on the secondary node as"
5619 " instance will not be started")
5621 # check bridge existance
5622 _CheckInstanceBridgesExist(self, instance, node=target_node)
5624 def Exec(self, feedback_fn):
5625 """Move an instance.
5627 The move is done by shutting it down on its present node, copying
5628 the data over (slow) and starting it on the new node.
5631 instance = self.instance
5633 source_node = instance.primary_node
5634 target_node = self.target_node
5636 self.LogInfo("Shutting down instance %s on source node %s",
5637 instance.name, source_node)
5639 result = self.rpc.call_instance_shutdown(source_node, instance,
5640 self.op.shutdown_timeout)
5641 msg = result.fail_msg
5643 if self.op.ignore_consistency:
5644 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5645 " Proceeding anyway. Please make sure node"
5646 " %s is down. Error details: %s",
5647 instance.name, source_node, source_node, msg)
5649 raise errors.OpExecError("Could not shutdown instance %s on"
5651 (instance.name, source_node, msg))
5653 # create the target disks
5655 _CreateDisks(self, instance, target_node=target_node)
5656 except errors.OpExecError:
5657 self.LogWarning("Device creation failed, reverting...")
5659 _RemoveDisks(self, instance, target_node=target_node)
5661 self.cfg.ReleaseDRBDMinors(instance.name)
5664 cluster_name = self.cfg.GetClusterInfo().cluster_name
5667 # activate, get path, copy the data over
5668 for idx, disk in enumerate(instance.disks):
5669 self.LogInfo("Copying data for disk %d", idx)
5670 result = self.rpc.call_blockdev_assemble(target_node, disk,
5671 instance.name, True)
5673 self.LogWarning("Can't assemble newly created disk %d: %s",
5674 idx, result.fail_msg)
5675 errs.append(result.fail_msg)
5677 dev_path = result.payload
5678 result = self.rpc.call_blockdev_export(source_node, disk,
5679 target_node, dev_path,
5682 self.LogWarning("Can't copy data over for disk %d: %s",
5683 idx, result.fail_msg)
5684 errs.append(result.fail_msg)
5688 self.LogWarning("Some disks failed to copy, aborting")
5690 _RemoveDisks(self, instance, target_node=target_node)
5692 self.cfg.ReleaseDRBDMinors(instance.name)
5693 raise errors.OpExecError("Errors during disk copy: %s" %
5696 instance.primary_node = target_node
5697 self.cfg.Update(instance, feedback_fn)
5699 self.LogInfo("Removing the disks on the original node")
5700 _RemoveDisks(self, instance, target_node=source_node)
5702 # Only start the instance if it's marked as up
5703 if instance.admin_up:
5704 self.LogInfo("Starting instance %s on node %s",
5705 instance.name, target_node)
5707 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5708 ignore_secondaries=True)
5710 _ShutdownInstanceDisks(self, instance)
5711 raise errors.OpExecError("Can't activate the instance's disks")
5713 result = self.rpc.call_instance_start(target_node, instance, None, None)
5714 msg = result.fail_msg
5716 _ShutdownInstanceDisks(self, instance)
5717 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5718 (instance.name, target_node, msg))
5721 class LUMigrateNode(LogicalUnit):
5722 """Migrate all instances from a node.
5725 HPATH = "node-migrate"
5726 HTYPE = constants.HTYPE_NODE
5733 def ExpandNames(self):
5734 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5736 self.needed_locks = {
5737 locking.LEVEL_NODE: [self.op.node_name],
5740 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5742 # Create tasklets for migrating instances for all instances on this node
5746 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5747 logging.debug("Migrating instance %s", inst.name)
5748 names.append(inst.name)
5750 tasklets.append(TLMigrateInstance(self, inst.name, False))
5752 self.tasklets = tasklets
5754 # Declare instance locks
5755 self.needed_locks[locking.LEVEL_INSTANCE] = names
5757 def DeclareLocks(self, level):
5758 if level == locking.LEVEL_NODE:
5759 self._LockInstancesNodes()
5761 def BuildHooksEnv(self):
5764 This runs on the master, the primary and all the secondaries.
5768 "NODE_NAME": self.op.node_name,
5771 nl = [self.cfg.GetMasterNode()]
5773 return (env, nl, nl)
5776 class TLMigrateInstance(Tasklet):
5777 """Tasklet class for instance migration.
5780 @ivar live: whether the migration will be done live or non-live;
5781 this variable is initalized only after CheckPrereq has run
5784 def __init__(self, lu, instance_name, cleanup):
5785 """Initializes this class.
5788 Tasklet.__init__(self, lu)
5791 self.instance_name = instance_name
5792 self.cleanup = cleanup
5793 self.live = False # will be overridden later
5795 def CheckPrereq(self):
5796 """Check prerequisites.
5798 This checks that the instance is in the cluster.
5801 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5802 instance = self.cfg.GetInstanceInfo(instance_name)
5803 assert instance is not None
5805 if instance.disk_template != constants.DT_DRBD8:
5806 raise errors.OpPrereqError("Instance's disk layout is not"
5807 " drbd8, cannot migrate.", errors.ECODE_STATE)
5809 secondary_nodes = instance.secondary_nodes
5810 if not secondary_nodes:
5811 raise errors.ConfigurationError("No secondary node but using"
5812 " drbd8 disk template")
5814 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5816 target_node = secondary_nodes[0]
5817 # check memory requirements on the secondary node
5818 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5819 instance.name, i_be[constants.BE_MEMORY],
5820 instance.hypervisor)
5822 # check bridge existance
5823 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5825 if not self.cleanup:
5826 _CheckNodeNotDrained(self.lu, target_node)
5827 result = self.rpc.call_instance_migratable(instance.primary_node,
5829 result.Raise("Can't migrate, please use failover",
5830 prereq=True, ecode=errors.ECODE_STATE)
5832 self.instance = instance
5834 if self.lu.op.mode is None:
5835 # read the default value from the hypervisor
5836 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5837 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5839 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5841 def _WaitUntilSync(self):
5842 """Poll with custom rpc for disk sync.
5844 This uses our own step-based rpc call.
5847 self.feedback_fn("* wait until resync is done")
5851 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5853 self.instance.disks)
5855 for node, nres in result.items():
5856 nres.Raise("Cannot resync disks on node %s" % node)
5857 node_done, node_percent = nres.payload
5858 all_done = all_done and node_done
5859 if node_percent is not None:
5860 min_percent = min(min_percent, node_percent)
5862 if min_percent < 100:
5863 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5866 def _EnsureSecondary(self, node):
5867 """Demote a node to secondary.
5870 self.feedback_fn("* switching node %s to secondary mode" % node)
5872 for dev in self.instance.disks:
5873 self.cfg.SetDiskID(dev, node)
5875 result = self.rpc.call_blockdev_close(node, self.instance.name,
5876 self.instance.disks)
5877 result.Raise("Cannot change disk to secondary on node %s" % node)
5879 def _GoStandalone(self):
5880 """Disconnect from the network.
5883 self.feedback_fn("* changing into standalone mode")
5884 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5885 self.instance.disks)
5886 for node, nres in result.items():
5887 nres.Raise("Cannot disconnect disks node %s" % node)
5889 def _GoReconnect(self, multimaster):
5890 """Reconnect to the network.
5896 msg = "single-master"
5897 self.feedback_fn("* changing disks into %s mode" % msg)
5898 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5899 self.instance.disks,
5900 self.instance.name, multimaster)
5901 for node, nres in result.items():
5902 nres.Raise("Cannot change disks config on node %s" % node)
5904 def _ExecCleanup(self):
5905 """Try to cleanup after a failed migration.
5907 The cleanup is done by:
5908 - check that the instance is running only on one node
5909 (and update the config if needed)
5910 - change disks on its secondary node to secondary
5911 - wait until disks are fully synchronized
5912 - disconnect from the network
5913 - change disks into single-master mode
5914 - wait again until disks are fully synchronized
5917 instance = self.instance
5918 target_node = self.target_node
5919 source_node = self.source_node
5921 # check running on only one node
5922 self.feedback_fn("* checking where the instance actually runs"
5923 " (if this hangs, the hypervisor might be in"
5925 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5926 for node, result in ins_l.items():
5927 result.Raise("Can't contact node %s" % node)
5929 runningon_source = instance.name in ins_l[source_node].payload
5930 runningon_target = instance.name in ins_l[target_node].payload
5932 if runningon_source and runningon_target:
5933 raise errors.OpExecError("Instance seems to be running on two nodes,"
5934 " or the hypervisor is confused. You will have"
5935 " to ensure manually that it runs only on one"
5936 " and restart this operation.")
5938 if not (runningon_source or runningon_target):
5939 raise errors.OpExecError("Instance does not seem to be running at all."
5940 " In this case, it's safer to repair by"
5941 " running 'gnt-instance stop' to ensure disk"
5942 " shutdown, and then restarting it.")
5944 if runningon_target:
5945 # the migration has actually succeeded, we need to update the config
5946 self.feedback_fn("* instance running on secondary node (%s),"
5947 " updating config" % target_node)
5948 instance.primary_node = target_node
5949 self.cfg.Update(instance, self.feedback_fn)
5950 demoted_node = source_node
5952 self.feedback_fn("* instance confirmed to be running on its"
5953 " primary node (%s)" % source_node)
5954 demoted_node = target_node
5956 self._EnsureSecondary(demoted_node)
5958 self._WaitUntilSync()
5959 except errors.OpExecError:
5960 # we ignore here errors, since if the device is standalone, it
5961 # won't be able to sync
5963 self._GoStandalone()
5964 self._GoReconnect(False)
5965 self._WaitUntilSync()
5967 self.feedback_fn("* done")
5969 def _RevertDiskStatus(self):
5970 """Try to revert the disk status after a failed migration.
5973 target_node = self.target_node
5975 self._EnsureSecondary(target_node)
5976 self._GoStandalone()
5977 self._GoReconnect(False)
5978 self._WaitUntilSync()
5979 except errors.OpExecError, err:
5980 self.lu.LogWarning("Migration failed and I can't reconnect the"
5981 " drives: error '%s'\n"
5982 "Please look and recover the instance status" %
5985 def _AbortMigration(self):
5986 """Call the hypervisor code to abort a started migration.
5989 instance = self.instance
5990 target_node = self.target_node
5991 migration_info = self.migration_info
5993 abort_result = self.rpc.call_finalize_migration(target_node,
5997 abort_msg = abort_result.fail_msg
5999 logging.error("Aborting migration failed on target node %s: %s",
6000 target_node, abort_msg)
6001 # Don't raise an exception here, as we stil have to try to revert the
6002 # disk status, even if this step failed.
6004 def _ExecMigration(self):
6005 """Migrate an instance.
6007 The migrate is done by:
6008 - change the disks into dual-master mode
6009 - wait until disks are fully synchronized again
6010 - migrate the instance
6011 - change disks on the new secondary node (the old primary) to secondary
6012 - wait until disks are fully synchronized
6013 - change disks into single-master mode
6016 instance = self.instance
6017 target_node = self.target_node
6018 source_node = self.source_node
6020 self.feedback_fn("* checking disk consistency between source and target")
6021 for dev in instance.disks:
6022 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6023 raise errors.OpExecError("Disk %s is degraded or not fully"
6024 " synchronized on target node,"
6025 " aborting migrate." % dev.iv_name)
6027 # First get the migration information from the remote node
6028 result = self.rpc.call_migration_info(source_node, instance)
6029 msg = result.fail_msg
6031 log_err = ("Failed fetching source migration information from %s: %s" %
6033 logging.error(log_err)
6034 raise errors.OpExecError(log_err)
6036 self.migration_info = migration_info = result.payload
6038 # Then switch the disks to master/master mode
6039 self._EnsureSecondary(target_node)
6040 self._GoStandalone()
6041 self._GoReconnect(True)
6042 self._WaitUntilSync()
6044 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6045 result = self.rpc.call_accept_instance(target_node,
6048 self.nodes_ip[target_node])
6050 msg = result.fail_msg
6052 logging.error("Instance pre-migration failed, trying to revert"
6053 " disk status: %s", msg)
6054 self.feedback_fn("Pre-migration failed, aborting")
6055 self._AbortMigration()
6056 self._RevertDiskStatus()
6057 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6058 (instance.name, msg))
6060 self.feedback_fn("* migrating instance to %s" % target_node)
6062 result = self.rpc.call_instance_migrate(source_node, instance,
6063 self.nodes_ip[target_node],
6065 msg = result.fail_msg
6067 logging.error("Instance migration failed, trying to revert"
6068 " disk status: %s", msg)
6069 self.feedback_fn("Migration failed, aborting")
6070 self._AbortMigration()
6071 self._RevertDiskStatus()
6072 raise errors.OpExecError("Could not migrate instance %s: %s" %
6073 (instance.name, msg))
6076 instance.primary_node = target_node
6077 # distribute new instance config to the other nodes
6078 self.cfg.Update(instance, self.feedback_fn)
6080 result = self.rpc.call_finalize_migration(target_node,
6084 msg = result.fail_msg
6086 logging.error("Instance migration succeeded, but finalization failed:"
6088 raise errors.OpExecError("Could not finalize instance migration: %s" %
6091 self._EnsureSecondary(source_node)
6092 self._WaitUntilSync()
6093 self._GoStandalone()
6094 self._GoReconnect(False)
6095 self._WaitUntilSync()
6097 self.feedback_fn("* done")
6099 def Exec(self, feedback_fn):
6100 """Perform the migration.
6103 feedback_fn("Migrating instance %s" % self.instance.name)
6105 self.feedback_fn = feedback_fn
6107 self.source_node = self.instance.primary_node
6108 self.target_node = self.instance.secondary_nodes[0]
6109 self.all_nodes = [self.source_node, self.target_node]
6111 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6112 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6116 return self._ExecCleanup()
6118 return self._ExecMigration()
6121 def _CreateBlockDev(lu, node, instance, device, force_create,
6123 """Create a tree of block devices on a given node.
6125 If this device type has to be created on secondaries, create it and
6128 If not, just recurse to children keeping the same 'force' value.
6130 @param lu: the lu on whose behalf we execute
6131 @param node: the node on which to create the device
6132 @type instance: L{objects.Instance}
6133 @param instance: the instance which owns the device
6134 @type device: L{objects.Disk}
6135 @param device: the device to create
6136 @type force_create: boolean
6137 @param force_create: whether to force creation of this device; this
6138 will be change to True whenever we find a device which has
6139 CreateOnSecondary() attribute
6140 @param info: the extra 'metadata' we should attach to the device
6141 (this will be represented as a LVM tag)
6142 @type force_open: boolean
6143 @param force_open: this parameter will be passes to the
6144 L{backend.BlockdevCreate} function where it specifies
6145 whether we run on primary or not, and it affects both
6146 the child assembly and the device own Open() execution
6149 if device.CreateOnSecondary():
6153 for child in device.children:
6154 _CreateBlockDev(lu, node, instance, child, force_create,
6157 if not force_create:
6160 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6163 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6164 """Create a single block device on a given node.
6166 This will not recurse over children of the device, so they must be
6169 @param lu: the lu on whose behalf we execute
6170 @param node: the node on which to create the device
6171 @type instance: L{objects.Instance}
6172 @param instance: the instance which owns the device
6173 @type device: L{objects.Disk}
6174 @param device: the device to create
6175 @param info: the extra 'metadata' we should attach to the device
6176 (this will be represented as a LVM tag)
6177 @type force_open: boolean
6178 @param force_open: this parameter will be passes to the
6179 L{backend.BlockdevCreate} function where it specifies
6180 whether we run on primary or not, and it affects both
6181 the child assembly and the device own Open() execution
6184 lu.cfg.SetDiskID(device, node)
6185 result = lu.rpc.call_blockdev_create(node, device, device.size,
6186 instance.name, force_open, info)
6187 result.Raise("Can't create block device %s on"
6188 " node %s for instance %s" % (device, node, instance.name))
6189 if device.physical_id is None:
6190 device.physical_id = result.payload
6193 def _GenerateUniqueNames(lu, exts):
6194 """Generate a suitable LV name.
6196 This will generate a logical volume name for the given instance.
6201 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6202 results.append("%s%s" % (new_id, val))
6206 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6208 """Generate a drbd8 device complete with its children.
6211 port = lu.cfg.AllocatePort()
6212 vgname = lu.cfg.GetVGName()
6213 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6214 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6215 logical_id=(vgname, names[0]))
6216 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6217 logical_id=(vgname, names[1]))
6218 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6219 logical_id=(primary, secondary, port,
6222 children=[dev_data, dev_meta],
6227 def _GenerateDiskTemplate(lu, template_name,
6228 instance_name, primary_node,
6229 secondary_nodes, disk_info,
6230 file_storage_dir, file_driver,
6232 """Generate the entire disk layout for a given template type.
6235 #TODO: compute space requirements
6237 vgname = lu.cfg.GetVGName()
6238 disk_count = len(disk_info)
6240 if template_name == constants.DT_DISKLESS:
6242 elif template_name == constants.DT_PLAIN:
6243 if len(secondary_nodes) != 0:
6244 raise errors.ProgrammerError("Wrong template configuration")
6246 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6247 for i in range(disk_count)])
6248 for idx, disk in enumerate(disk_info):
6249 disk_index = idx + base_index
6250 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6251 logical_id=(vgname, names[idx]),
6252 iv_name="disk/%d" % disk_index,
6254 disks.append(disk_dev)
6255 elif template_name == constants.DT_DRBD8:
6256 if len(secondary_nodes) != 1:
6257 raise errors.ProgrammerError("Wrong template configuration")
6258 remote_node = secondary_nodes[0]
6259 minors = lu.cfg.AllocateDRBDMinor(
6260 [primary_node, remote_node] * len(disk_info), instance_name)
6263 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6264 for i in range(disk_count)]):
6265 names.append(lv_prefix + "_data")
6266 names.append(lv_prefix + "_meta")
6267 for idx, disk in enumerate(disk_info):
6268 disk_index = idx + base_index
6269 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6270 disk["size"], names[idx*2:idx*2+2],
6271 "disk/%d" % disk_index,
6272 minors[idx*2], minors[idx*2+1])
6273 disk_dev.mode = disk["mode"]
6274 disks.append(disk_dev)
6275 elif template_name == constants.DT_FILE:
6276 if len(secondary_nodes) != 0:
6277 raise errors.ProgrammerError("Wrong template configuration")
6279 _RequireFileStorage()
6281 for idx, disk in enumerate(disk_info):
6282 disk_index = idx + base_index
6283 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6284 iv_name="disk/%d" % disk_index,
6285 logical_id=(file_driver,
6286 "%s/disk%d" % (file_storage_dir,
6289 disks.append(disk_dev)
6291 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6295 def _GetInstanceInfoText(instance):
6296 """Compute that text that should be added to the disk's metadata.
6299 return "originstname+%s" % instance.name
6302 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6303 """Create all disks for an instance.
6305 This abstracts away some work from AddInstance.
6307 @type lu: L{LogicalUnit}
6308 @param lu: the logical unit on whose behalf we execute
6309 @type instance: L{objects.Instance}
6310 @param instance: the instance whose disks we should create
6312 @param to_skip: list of indices to skip
6313 @type target_node: string
6314 @param target_node: if passed, overrides the target node for creation
6316 @return: the success of the creation
6319 info = _GetInstanceInfoText(instance)
6320 if target_node is None:
6321 pnode = instance.primary_node
6322 all_nodes = instance.all_nodes
6327 if instance.disk_template == constants.DT_FILE:
6328 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6329 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6331 result.Raise("Failed to create directory '%s' on"
6332 " node %s" % (file_storage_dir, pnode))
6334 # Note: this needs to be kept in sync with adding of disks in
6335 # LUSetInstanceParams
6336 for idx, device in enumerate(instance.disks):
6337 if to_skip and idx in to_skip:
6339 logging.info("Creating volume %s for instance %s",
6340 device.iv_name, instance.name)
6342 for node in all_nodes:
6343 f_create = node == pnode
6344 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6347 def _RemoveDisks(lu, instance, target_node=None):
6348 """Remove all disks for an instance.
6350 This abstracts away some work from `AddInstance()` and
6351 `RemoveInstance()`. Note that in case some of the devices couldn't
6352 be removed, the removal will continue with the other ones (compare
6353 with `_CreateDisks()`).
6355 @type lu: L{LogicalUnit}
6356 @param lu: the logical unit on whose behalf we execute
6357 @type instance: L{objects.Instance}
6358 @param instance: the instance whose disks we should remove
6359 @type target_node: string
6360 @param target_node: used to override the node on which to remove the disks
6362 @return: the success of the removal
6365 logging.info("Removing block devices for instance %s", instance.name)
6368 for device in instance.disks:
6370 edata = [(target_node, device)]
6372 edata = device.ComputeNodeTree(instance.primary_node)
6373 for node, disk in edata:
6374 lu.cfg.SetDiskID(disk, node)
6375 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6377 lu.LogWarning("Could not remove block device %s on node %s,"
6378 " continuing anyway: %s", device.iv_name, node, msg)
6381 if instance.disk_template == constants.DT_FILE:
6382 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6386 tgt = instance.primary_node
6387 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6389 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6390 file_storage_dir, instance.primary_node, result.fail_msg)
6396 def _ComputeDiskSize(disk_template, disks):
6397 """Compute disk size requirements in the volume group
6400 # Required free disk space as a function of disk and swap space
6402 constants.DT_DISKLESS: None,
6403 constants.DT_PLAIN: sum(d["size"] for d in disks),
6404 # 128 MB are added for drbd metadata for each disk
6405 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6406 constants.DT_FILE: None,
6409 if disk_template not in req_size_dict:
6410 raise errors.ProgrammerError("Disk template '%s' size requirement"
6411 " is unknown" % disk_template)
6413 return req_size_dict[disk_template]
6416 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6417 """Hypervisor parameter validation.
6419 This function abstract the hypervisor parameter validation to be
6420 used in both instance create and instance modify.
6422 @type lu: L{LogicalUnit}
6423 @param lu: the logical unit for which we check
6424 @type nodenames: list
6425 @param nodenames: the list of nodes on which we should check
6426 @type hvname: string
6427 @param hvname: the name of the hypervisor we should use
6428 @type hvparams: dict
6429 @param hvparams: the parameters which we need to check
6430 @raise errors.OpPrereqError: if the parameters are not valid
6433 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6436 for node in nodenames:
6440 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6443 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6444 """OS parameters validation.
6446 @type lu: L{LogicalUnit}
6447 @param lu: the logical unit for which we check
6448 @type required: boolean
6449 @param required: whether the validation should fail if the OS is not
6451 @type nodenames: list
6452 @param nodenames: the list of nodes on which we should check
6453 @type osname: string
6454 @param osname: the name of the hypervisor we should use
6455 @type osparams: dict
6456 @param osparams: the parameters which we need to check
6457 @raise errors.OpPrereqError: if the parameters are not valid
6460 result = lu.rpc.call_os_validate(required, nodenames, osname,
6461 [constants.OS_VALIDATE_PARAMETERS],
6463 for node, nres in result.items():
6464 # we don't check for offline cases since this should be run only
6465 # against the master node and/or an instance's nodes
6466 nres.Raise("OS Parameters validation failed on node %s" % node)
6467 if not nres.payload:
6468 lu.LogInfo("OS %s not found on node %s, validation skipped",
6472 class LUCreateInstance(LogicalUnit):
6473 """Create an instance.
6476 HPATH = "instance-add"
6477 HTYPE = constants.HTYPE_INSTANCE
6480 ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6481 ("start", True, _TBool),
6482 ("wait_for_sync", True, _TBool),
6483 ("ip_check", True, _TBool),
6484 ("name_check", True, _TBool),
6485 ("disks", _NoDefault, _TListOf(_TDict)),
6486 ("nics", _NoDefault, _TListOf(_TDict)),
6487 ("hvparams", _EmptyDict, _TDict),
6488 ("beparams", _EmptyDict, _TDict),
6489 ("osparams", _EmptyDict, _TDict),
6490 ("no_install", None, _TMaybeBool),
6491 ("os_type", None, _TMaybeString),
6492 ("force_variant", False, _TBool),
6493 ("source_handshake", None, _TOr(_TList, _TNone)),
6494 ("source_x509_ca", None, _TOr(_TList, _TNone)),
6495 ("source_instance_name", None, _TMaybeString),
6496 ("src_node", None, _TMaybeString),
6497 ("src_path", None, _TMaybeString),
6498 ("pnode", None, _TMaybeString),
6499 ("snode", None, _TMaybeString),
6500 ("iallocator", None, _TMaybeString),
6501 ("hypervisor", None, _TMaybeString),
6502 ("disk_template", _NoDefault, _CheckDiskTemplate),
6503 ("identify_defaults", False, _TBool),
6504 ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6505 ("file_storage_dir", None, _TMaybeString),
6506 ("dry_run", False, _TBool),
6510 def CheckArguments(self):
6514 # do not require name_check to ease forward/backward compatibility
6516 if self.op.no_install and self.op.start:
6517 self.LogInfo("No-installation mode selected, disabling startup")
6518 self.op.start = False
6519 # validate/normalize the instance name
6520 self.op.instance_name = \
6521 netutils.HostInfo.NormalizeName(self.op.instance_name)
6523 if self.op.ip_check and not self.op.name_check:
6524 # TODO: make the ip check more flexible and not depend on the name check
6525 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6528 # check nics' parameter names
6529 for nic in self.op.nics:
6530 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6532 # check disks. parameter names and consistent adopt/no-adopt strategy
6533 has_adopt = has_no_adopt = False
6534 for disk in self.op.disks:
6535 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6540 if has_adopt and has_no_adopt:
6541 raise errors.OpPrereqError("Either all disks are adopted or none is",
6544 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6545 raise errors.OpPrereqError("Disk adoption is not supported for the"
6546 " '%s' disk template" %
6547 self.op.disk_template,
6549 if self.op.iallocator is not None:
6550 raise errors.OpPrereqError("Disk adoption not allowed with an"
6551 " iallocator script", errors.ECODE_INVAL)
6552 if self.op.mode == constants.INSTANCE_IMPORT:
6553 raise errors.OpPrereqError("Disk adoption not allowed for"
6554 " instance import", errors.ECODE_INVAL)
6556 self.adopt_disks = has_adopt
6558 # instance name verification
6559 if self.op.name_check:
6560 self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6561 self.op.instance_name = self.hostname1.name
6562 # used in CheckPrereq for ip ping check
6563 self.check_ip = self.hostname1.ip
6564 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6565 raise errors.OpPrereqError("Remote imports require names to be checked" %
6568 self.check_ip = None
6570 # file storage checks
6571 if (self.op.file_driver and
6572 not self.op.file_driver in constants.FILE_DRIVER):
6573 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6574 self.op.file_driver, errors.ECODE_INVAL)
6576 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6577 raise errors.OpPrereqError("File storage directory path not absolute",
6580 ### Node/iallocator related checks
6581 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6583 self._cds = _GetClusterDomainSecret()
6585 if self.op.mode == constants.INSTANCE_IMPORT:
6586 # On import force_variant must be True, because if we forced it at
6587 # initial install, our only chance when importing it back is that it
6589 self.op.force_variant = True
6591 if self.op.no_install:
6592 self.LogInfo("No-installation mode has no effect during import")
6594 elif self.op.mode == constants.INSTANCE_CREATE:
6595 if self.op.os_type is None:
6596 raise errors.OpPrereqError("No guest OS specified",
6598 if self.op.disk_template is None:
6599 raise errors.OpPrereqError("No disk template specified",
6602 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6603 # Check handshake to ensure both clusters have the same domain secret
6604 src_handshake = self.op.source_handshake
6605 if not src_handshake:
6606 raise errors.OpPrereqError("Missing source handshake",
6609 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6612 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6615 # Load and check source CA
6616 self.source_x509_ca_pem = self.op.source_x509_ca
6617 if not self.source_x509_ca_pem:
6618 raise errors.OpPrereqError("Missing source X509 CA",
6622 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6624 except OpenSSL.crypto.Error, err:
6625 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6626 (err, ), errors.ECODE_INVAL)
6628 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6629 if errcode is not None:
6630 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6633 self.source_x509_ca = cert
6635 src_instance_name = self.op.source_instance_name
6636 if not src_instance_name:
6637 raise errors.OpPrereqError("Missing source instance name",
6640 norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6641 self.source_instance_name = netutils.GetHostInfo(norm_name).name
6644 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6645 self.op.mode, errors.ECODE_INVAL)
6647 def ExpandNames(self):
6648 """ExpandNames for CreateInstance.
6650 Figure out the right locks for instance creation.
6653 self.needed_locks = {}
6655 instance_name = self.op.instance_name
6656 # this is just a preventive check, but someone might still add this
6657 # instance in the meantime, and creation will fail at lock-add time
6658 if instance_name in self.cfg.GetInstanceList():
6659 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6660 instance_name, errors.ECODE_EXISTS)
6662 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6664 if self.op.iallocator:
6665 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6667 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6668 nodelist = [self.op.pnode]
6669 if self.op.snode is not None:
6670 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6671 nodelist.append(self.op.snode)
6672 self.needed_locks[locking.LEVEL_NODE] = nodelist
6674 # in case of import lock the source node too
6675 if self.op.mode == constants.INSTANCE_IMPORT:
6676 src_node = self.op.src_node
6677 src_path = self.op.src_path
6679 if src_path is None:
6680 self.op.src_path = src_path = self.op.instance_name
6682 if src_node is None:
6683 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6684 self.op.src_node = None
6685 if os.path.isabs(src_path):
6686 raise errors.OpPrereqError("Importing an instance from an absolute"
6687 " path requires a source node option.",
6690 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6691 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6692 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6693 if not os.path.isabs(src_path):
6694 self.op.src_path = src_path = \
6695 utils.PathJoin(constants.EXPORT_DIR, src_path)
6697 def _RunAllocator(self):
6698 """Run the allocator based on input opcode.
6701 nics = [n.ToDict() for n in self.nics]
6702 ial = IAllocator(self.cfg, self.rpc,
6703 mode=constants.IALLOCATOR_MODE_ALLOC,
6704 name=self.op.instance_name,
6705 disk_template=self.op.disk_template,
6708 vcpus=self.be_full[constants.BE_VCPUS],
6709 mem_size=self.be_full[constants.BE_MEMORY],
6712 hypervisor=self.op.hypervisor,
6715 ial.Run(self.op.iallocator)
6718 raise errors.OpPrereqError("Can't compute nodes using"
6719 " iallocator '%s': %s" %
6720 (self.op.iallocator, ial.info),
6722 if len(ial.result) != ial.required_nodes:
6723 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6724 " of nodes (%s), required %s" %
6725 (self.op.iallocator, len(ial.result),
6726 ial.required_nodes), errors.ECODE_FAULT)
6727 self.op.pnode = ial.result[0]
6728 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6729 self.op.instance_name, self.op.iallocator,
6730 utils.CommaJoin(ial.result))
6731 if ial.required_nodes == 2:
6732 self.op.snode = ial.result[1]
6734 def BuildHooksEnv(self):
6737 This runs on master, primary and secondary nodes of the instance.
6741 "ADD_MODE": self.op.mode,
6743 if self.op.mode == constants.INSTANCE_IMPORT:
6744 env["SRC_NODE"] = self.op.src_node
6745 env["SRC_PATH"] = self.op.src_path
6746 env["SRC_IMAGES"] = self.src_images
6748 env.update(_BuildInstanceHookEnv(
6749 name=self.op.instance_name,
6750 primary_node=self.op.pnode,
6751 secondary_nodes=self.secondaries,
6752 status=self.op.start,
6753 os_type=self.op.os_type,
6754 memory=self.be_full[constants.BE_MEMORY],
6755 vcpus=self.be_full[constants.BE_VCPUS],
6756 nics=_NICListToTuple(self, self.nics),
6757 disk_template=self.op.disk_template,
6758 disks=[(d["size"], d["mode"]) for d in self.disks],
6761 hypervisor_name=self.op.hypervisor,
6764 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6768 def _ReadExportInfo(self):
6769 """Reads the export information from disk.
6771 It will override the opcode source node and path with the actual
6772 information, if these two were not specified before.
6774 @return: the export information
6777 assert self.op.mode == constants.INSTANCE_IMPORT
6779 src_node = self.op.src_node
6780 src_path = self.op.src_path
6782 if src_node is None:
6783 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6784 exp_list = self.rpc.call_export_list(locked_nodes)
6786 for node in exp_list:
6787 if exp_list[node].fail_msg:
6789 if src_path in exp_list[node].payload:
6791 self.op.src_node = src_node = node
6792 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6796 raise errors.OpPrereqError("No export found for relative path %s" %
6797 src_path, errors.ECODE_INVAL)
6799 _CheckNodeOnline(self, src_node)
6800 result = self.rpc.call_export_info(src_node, src_path)
6801 result.Raise("No export or invalid export found in dir %s" % src_path)
6803 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6804 if not export_info.has_section(constants.INISECT_EXP):
6805 raise errors.ProgrammerError("Corrupted export config",
6806 errors.ECODE_ENVIRON)
6808 ei_version = export_info.get(constants.INISECT_EXP, "version")
6809 if (int(ei_version) != constants.EXPORT_VERSION):
6810 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6811 (ei_version, constants.EXPORT_VERSION),
6812 errors.ECODE_ENVIRON)
6815 def _ReadExportParams(self, einfo):
6816 """Use export parameters as defaults.
6818 In case the opcode doesn't specify (as in override) some instance
6819 parameters, then try to use them from the export information, if
6823 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6825 if self.op.disk_template is None:
6826 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6827 self.op.disk_template = einfo.get(constants.INISECT_INS,
6830 raise errors.OpPrereqError("No disk template specified and the export"
6831 " is missing the disk_template information",
6834 if not self.op.disks:
6835 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6837 # TODO: import the disk iv_name too
6838 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6839 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6840 disks.append({"size": disk_sz})
6841 self.op.disks = disks
6843 raise errors.OpPrereqError("No disk info specified and the export"
6844 " is missing the disk information",
6847 if (not self.op.nics and
6848 einfo.has_option(constants.INISECT_INS, "nic_count")):
6850 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6852 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6853 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6858 if (self.op.hypervisor is None and
6859 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6860 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6861 if einfo.has_section(constants.INISECT_HYP):
6862 # use the export parameters but do not override the ones
6863 # specified by the user
6864 for name, value in einfo.items(constants.INISECT_HYP):
6865 if name not in self.op.hvparams:
6866 self.op.hvparams[name] = value
6868 if einfo.has_section(constants.INISECT_BEP):
6869 # use the parameters, without overriding
6870 for name, value in einfo.items(constants.INISECT_BEP):
6871 if name not in self.op.beparams:
6872 self.op.beparams[name] = value
6874 # try to read the parameters old style, from the main section
6875 for name in constants.BES_PARAMETERS:
6876 if (name not in self.op.beparams and
6877 einfo.has_option(constants.INISECT_INS, name)):
6878 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6880 if einfo.has_section(constants.INISECT_OSP):
6881 # use the parameters, without overriding
6882 for name, value in einfo.items(constants.INISECT_OSP):
6883 if name not in self.op.osparams:
6884 self.op.osparams[name] = value
6886 def _RevertToDefaults(self, cluster):
6887 """Revert the instance parameters to the default values.
6891 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6892 for name in self.op.hvparams.keys():
6893 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6894 del self.op.hvparams[name]
6896 be_defs = cluster.SimpleFillBE({})
6897 for name in self.op.beparams.keys():
6898 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6899 del self.op.beparams[name]
6901 nic_defs = cluster.SimpleFillNIC({})
6902 for nic in self.op.nics:
6903 for name in constants.NICS_PARAMETERS:
6904 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6907 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6908 for name in self.op.osparams.keys():
6909 if name in os_defs and os_defs[name] == self.op.osparams[name]:
6910 del self.op.osparams[name]
6912 def CheckPrereq(self):
6913 """Check prerequisites.
6916 if self.op.mode == constants.INSTANCE_IMPORT:
6917 export_info = self._ReadExportInfo()
6918 self._ReadExportParams(export_info)
6920 _CheckDiskTemplate(self.op.disk_template)
6922 if (not self.cfg.GetVGName() and
6923 self.op.disk_template not in constants.DTS_NOT_LVM):
6924 raise errors.OpPrereqError("Cluster does not support lvm-based"
6925 " instances", errors.ECODE_STATE)
6927 if self.op.hypervisor is None:
6928 self.op.hypervisor = self.cfg.GetHypervisorType()
6930 cluster = self.cfg.GetClusterInfo()
6931 enabled_hvs = cluster.enabled_hypervisors
6932 if self.op.hypervisor not in enabled_hvs:
6933 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6934 " cluster (%s)" % (self.op.hypervisor,
6935 ",".join(enabled_hvs)),
6938 # check hypervisor parameter syntax (locally)
6939 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6940 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6942 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6943 hv_type.CheckParameterSyntax(filled_hvp)
6944 self.hv_full = filled_hvp
6945 # check that we don't specify global parameters on an instance
6946 _CheckGlobalHvParams(self.op.hvparams)
6948 # fill and remember the beparams dict
6949 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6950 self.be_full = cluster.SimpleFillBE(self.op.beparams)
6952 # build os parameters
6953 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6955 # now that hvp/bep are in final format, let's reset to defaults,
6957 if self.op.identify_defaults:
6958 self._RevertToDefaults(cluster)
6962 for idx, nic in enumerate(self.op.nics):
6963 nic_mode_req = nic.get("mode", None)
6964 nic_mode = nic_mode_req
6965 if nic_mode is None:
6966 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6968 # in routed mode, for the first nic, the default ip is 'auto'
6969 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6970 default_ip_mode = constants.VALUE_AUTO
6972 default_ip_mode = constants.VALUE_NONE
6974 # ip validity checks
6975 ip = nic.get("ip", default_ip_mode)
6976 if ip is None or ip.lower() == constants.VALUE_NONE:
6978 elif ip.lower() == constants.VALUE_AUTO:
6979 if not self.op.name_check:
6980 raise errors.OpPrereqError("IP address set to auto but name checks"
6981 " have been skipped. Aborting.",
6983 nic_ip = self.hostname1.ip
6985 if not netutils.IsValidIP4(ip):
6986 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6987 " like a valid IP" % ip,
6991 # TODO: check the ip address for uniqueness
6992 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6993 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6996 # MAC address verification
6997 mac = nic.get("mac", constants.VALUE_AUTO)
6998 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6999 mac = utils.NormalizeAndValidateMac(mac)
7002 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7003 except errors.ReservationError:
7004 raise errors.OpPrereqError("MAC address %s already in use"
7005 " in cluster" % mac,
7006 errors.ECODE_NOTUNIQUE)
7008 # bridge verification
7009 bridge = nic.get("bridge", None)
7010 link = nic.get("link", None)
7012 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7013 " at the same time", errors.ECODE_INVAL)
7014 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7015 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7022 nicparams[constants.NIC_MODE] = nic_mode_req
7024 nicparams[constants.NIC_LINK] = link
7026 check_params = cluster.SimpleFillNIC(nicparams)
7027 objects.NIC.CheckParameterSyntax(check_params)
7028 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7030 # disk checks/pre-build
7032 for disk in self.op.disks:
7033 mode = disk.get("mode", constants.DISK_RDWR)
7034 if mode not in constants.DISK_ACCESS_SET:
7035 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7036 mode, errors.ECODE_INVAL)
7037 size = disk.get("size", None)
7039 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7042 except (TypeError, ValueError):
7043 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7045 new_disk = {"size": size, "mode": mode}
7047 new_disk["adopt"] = disk["adopt"]
7048 self.disks.append(new_disk)
7050 if self.op.mode == constants.INSTANCE_IMPORT:
7052 # Check that the new instance doesn't have less disks than the export
7053 instance_disks = len(self.disks)
7054 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7055 if instance_disks < export_disks:
7056 raise errors.OpPrereqError("Not enough disks to import."
7057 " (instance: %d, export: %d)" %
7058 (instance_disks, export_disks),
7062 for idx in range(export_disks):
7063 option = 'disk%d_dump' % idx
7064 if export_info.has_option(constants.INISECT_INS, option):
7065 # FIXME: are the old os-es, disk sizes, etc. useful?
7066 export_name = export_info.get(constants.INISECT_INS, option)
7067 image = utils.PathJoin(self.op.src_path, export_name)
7068 disk_images.append(image)
7070 disk_images.append(False)
7072 self.src_images = disk_images
7074 old_name = export_info.get(constants.INISECT_INS, 'name')
7076 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7077 except (TypeError, ValueError), err:
7078 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7079 " an integer: %s" % str(err),
7081 if self.op.instance_name == old_name:
7082 for idx, nic in enumerate(self.nics):
7083 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7084 nic_mac_ini = 'nic%d_mac' % idx
7085 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7087 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7089 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7090 if self.op.ip_check:
7091 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7092 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7093 (self.check_ip, self.op.instance_name),
7094 errors.ECODE_NOTUNIQUE)
7096 #### mac address generation
7097 # By generating here the mac address both the allocator and the hooks get
7098 # the real final mac address rather than the 'auto' or 'generate' value.
7099 # There is a race condition between the generation and the instance object
7100 # creation, which means that we know the mac is valid now, but we're not
7101 # sure it will be when we actually add the instance. If things go bad
7102 # adding the instance will abort because of a duplicate mac, and the
7103 # creation job will fail.
7104 for nic in self.nics:
7105 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7106 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7110 if self.op.iallocator is not None:
7111 self._RunAllocator()
7113 #### node related checks
7115 # check primary node
7116 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7117 assert self.pnode is not None, \
7118 "Cannot retrieve locked node %s" % self.op.pnode
7120 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7121 pnode.name, errors.ECODE_STATE)
7123 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7124 pnode.name, errors.ECODE_STATE)
7126 self.secondaries = []
7128 # mirror node verification
7129 if self.op.disk_template in constants.DTS_NET_MIRROR:
7130 if self.op.snode is None:
7131 raise errors.OpPrereqError("The networked disk templates need"
7132 " a mirror node", errors.ECODE_INVAL)
7133 if self.op.snode == pnode.name:
7134 raise errors.OpPrereqError("The secondary node cannot be the"
7135 " primary node.", errors.ECODE_INVAL)
7136 _CheckNodeOnline(self, self.op.snode)
7137 _CheckNodeNotDrained(self, self.op.snode)
7138 self.secondaries.append(self.op.snode)
7140 nodenames = [pnode.name] + self.secondaries
7142 req_size = _ComputeDiskSize(self.op.disk_template,
7145 # Check lv size requirements, if not adopting
7146 if req_size is not None and not self.adopt_disks:
7147 _CheckNodesFreeDisk(self, nodenames, req_size)
7149 if self.adopt_disks: # instead, we must check the adoption data
7150 all_lvs = set([i["adopt"] for i in self.disks])
7151 if len(all_lvs) != len(self.disks):
7152 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7154 for lv_name in all_lvs:
7156 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7157 except errors.ReservationError:
7158 raise errors.OpPrereqError("LV named %s used by another instance" %
7159 lv_name, errors.ECODE_NOTUNIQUE)
7161 node_lvs = self.rpc.call_lv_list([pnode.name],
7162 self.cfg.GetVGName())[pnode.name]
7163 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7164 node_lvs = node_lvs.payload
7165 delta = all_lvs.difference(node_lvs.keys())
7167 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7168 utils.CommaJoin(delta),
7170 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7172 raise errors.OpPrereqError("Online logical volumes found, cannot"
7173 " adopt: %s" % utils.CommaJoin(online_lvs),
7175 # update the size of disk based on what is found
7176 for dsk in self.disks:
7177 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7179 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7181 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7182 # check OS parameters (remotely)
7183 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7185 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7187 # memory check on primary node
7189 _CheckNodeFreeMemory(self, self.pnode.name,
7190 "creating instance %s" % self.op.instance_name,
7191 self.be_full[constants.BE_MEMORY],
7194 self.dry_run_result = list(nodenames)
7196 def Exec(self, feedback_fn):
7197 """Create and add the instance to the cluster.
7200 instance = self.op.instance_name
7201 pnode_name = self.pnode.name
7203 ht_kind = self.op.hypervisor
7204 if ht_kind in constants.HTS_REQ_PORT:
7205 network_port = self.cfg.AllocatePort()
7209 if constants.ENABLE_FILE_STORAGE:
7210 # this is needed because os.path.join does not accept None arguments
7211 if self.op.file_storage_dir is None:
7212 string_file_storage_dir = ""
7214 string_file_storage_dir = self.op.file_storage_dir
7216 # build the full file storage dir path
7217 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7218 string_file_storage_dir, instance)
7220 file_storage_dir = ""
7222 disks = _GenerateDiskTemplate(self,
7223 self.op.disk_template,
7224 instance, pnode_name,
7228 self.op.file_driver,
7231 iobj = objects.Instance(name=instance, os=self.op.os_type,
7232 primary_node=pnode_name,
7233 nics=self.nics, disks=disks,
7234 disk_template=self.op.disk_template,
7236 network_port=network_port,
7237 beparams=self.op.beparams,
7238 hvparams=self.op.hvparams,
7239 hypervisor=self.op.hypervisor,
7240 osparams=self.op.osparams,
7243 if self.adopt_disks:
7244 # rename LVs to the newly-generated names; we need to construct
7245 # 'fake' LV disks with the old data, plus the new unique_id
7246 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7248 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7249 rename_to.append(t_dsk.logical_id)
7250 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7251 self.cfg.SetDiskID(t_dsk, pnode_name)
7252 result = self.rpc.call_blockdev_rename(pnode_name,
7253 zip(tmp_disks, rename_to))
7254 result.Raise("Failed to rename adoped LVs")
7256 feedback_fn("* creating instance disks...")
7258 _CreateDisks(self, iobj)
7259 except errors.OpExecError:
7260 self.LogWarning("Device creation failed, reverting...")
7262 _RemoveDisks(self, iobj)
7264 self.cfg.ReleaseDRBDMinors(instance)
7267 feedback_fn("adding instance %s to cluster config" % instance)
7269 self.cfg.AddInstance(iobj, self.proc.GetECId())
7271 # Declare that we don't want to remove the instance lock anymore, as we've
7272 # added the instance to the config
7273 del self.remove_locks[locking.LEVEL_INSTANCE]
7274 # Unlock all the nodes
7275 if self.op.mode == constants.INSTANCE_IMPORT:
7276 nodes_keep = [self.op.src_node]
7277 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7278 if node != self.op.src_node]
7279 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7280 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7282 self.context.glm.release(locking.LEVEL_NODE)
7283 del self.acquired_locks[locking.LEVEL_NODE]
7285 if self.op.wait_for_sync:
7286 disk_abort = not _WaitForSync(self, iobj)
7287 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7288 # make sure the disks are not degraded (still sync-ing is ok)
7290 feedback_fn("* checking mirrors status")
7291 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7296 _RemoveDisks(self, iobj)
7297 self.cfg.RemoveInstance(iobj.name)
7298 # Make sure the instance lock gets removed
7299 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7300 raise errors.OpExecError("There are some degraded disks for"
7303 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7304 if self.op.mode == constants.INSTANCE_CREATE:
7305 if not self.op.no_install:
7306 feedback_fn("* running the instance OS create scripts...")
7307 # FIXME: pass debug option from opcode to backend
7308 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7309 self.op.debug_level)
7310 result.Raise("Could not add os for instance %s"
7311 " on node %s" % (instance, pnode_name))
7313 elif self.op.mode == constants.INSTANCE_IMPORT:
7314 feedback_fn("* running the instance OS import scripts...")
7318 for idx, image in enumerate(self.src_images):
7322 # FIXME: pass debug option from opcode to backend
7323 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7324 constants.IEIO_FILE, (image, ),
7325 constants.IEIO_SCRIPT,
7326 (iobj.disks[idx], idx),
7328 transfers.append(dt)
7331 masterd.instance.TransferInstanceData(self, feedback_fn,
7332 self.op.src_node, pnode_name,
7333 self.pnode.secondary_ip,
7335 if not compat.all(import_result):
7336 self.LogWarning("Some disks for instance %s on node %s were not"
7337 " imported successfully" % (instance, pnode_name))
7339 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7340 feedback_fn("* preparing remote import...")
7341 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7342 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7344 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7345 self.source_x509_ca,
7346 self._cds, timeouts)
7347 if not compat.all(disk_results):
7348 # TODO: Should the instance still be started, even if some disks
7349 # failed to import (valid for local imports, too)?
7350 self.LogWarning("Some disks for instance %s on node %s were not"
7351 " imported successfully" % (instance, pnode_name))
7353 # Run rename script on newly imported instance
7354 assert iobj.name == instance
7355 feedback_fn("Running rename script for %s" % instance)
7356 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7357 self.source_instance_name,
7358 self.op.debug_level)
7360 self.LogWarning("Failed to run rename script for %s on node"
7361 " %s: %s" % (instance, pnode_name, result.fail_msg))
7364 # also checked in the prereq part
7365 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7369 iobj.admin_up = True
7370 self.cfg.Update(iobj, feedback_fn)
7371 logging.info("Starting instance %s on node %s", instance, pnode_name)
7372 feedback_fn("* starting instance...")
7373 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7374 result.Raise("Could not start instance")
7376 return list(iobj.all_nodes)
7379 class LUConnectConsole(NoHooksLU):
7380 """Connect to an instance's console.
7382 This is somewhat special in that it returns the command line that
7383 you need to run on the master node in order to connect to the
7392 def ExpandNames(self):
7393 self._ExpandAndLockInstance()
7395 def CheckPrereq(self):
7396 """Check prerequisites.
7398 This checks that the instance is in the cluster.
7401 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7402 assert self.instance is not None, \
7403 "Cannot retrieve locked instance %s" % self.op.instance_name
7404 _CheckNodeOnline(self, self.instance.primary_node)
7406 def Exec(self, feedback_fn):
7407 """Connect to the console of an instance
7410 instance = self.instance
7411 node = instance.primary_node
7413 node_insts = self.rpc.call_instance_list([node],
7414 [instance.hypervisor])[node]
7415 node_insts.Raise("Can't get node information from %s" % node)
7417 if instance.name not in node_insts.payload:
7418 raise errors.OpExecError("Instance %s is not running." % instance.name)
7420 logging.debug("Connecting to console of %s on %s", instance.name, node)
7422 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7423 cluster = self.cfg.GetClusterInfo()
7424 # beparams and hvparams are passed separately, to avoid editing the
7425 # instance and then saving the defaults in the instance itself.
7426 hvparams = cluster.FillHV(instance)
7427 beparams = cluster.FillBE(instance)
7428 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7431 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7434 class LUReplaceDisks(LogicalUnit):
7435 """Replace the disks of an instance.
7438 HPATH = "mirrors-replace"
7439 HTYPE = constants.HTYPE_INSTANCE
7442 ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7443 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7444 ("remote_node", None, _TMaybeString),
7445 ("iallocator", None, _TMaybeString),
7446 ("early_release", False, _TBool),
7450 def CheckArguments(self):
7451 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7454 def ExpandNames(self):
7455 self._ExpandAndLockInstance()
7457 if self.op.iallocator is not None:
7458 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7460 elif self.op.remote_node is not None:
7461 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7462 self.op.remote_node = remote_node
7464 # Warning: do not remove the locking of the new secondary here
7465 # unless DRBD8.AddChildren is changed to work in parallel;
7466 # currently it doesn't since parallel invocations of
7467 # FindUnusedMinor will conflict
7468 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7469 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7472 self.needed_locks[locking.LEVEL_NODE] = []
7473 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7475 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7476 self.op.iallocator, self.op.remote_node,
7477 self.op.disks, False, self.op.early_release)
7479 self.tasklets = [self.replacer]
7481 def DeclareLocks(self, level):
7482 # If we're not already locking all nodes in the set we have to declare the
7483 # instance's primary/secondary nodes.
7484 if (level == locking.LEVEL_NODE and
7485 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7486 self._LockInstancesNodes()
7488 def BuildHooksEnv(self):
7491 This runs on the master, the primary and all the secondaries.
7494 instance = self.replacer.instance
7496 "MODE": self.op.mode,
7497 "NEW_SECONDARY": self.op.remote_node,
7498 "OLD_SECONDARY": instance.secondary_nodes[0],
7500 env.update(_BuildInstanceHookEnvByObject(self, instance))
7502 self.cfg.GetMasterNode(),
7503 instance.primary_node,
7505 if self.op.remote_node is not None:
7506 nl.append(self.op.remote_node)
7510 class TLReplaceDisks(Tasklet):
7511 """Replaces disks for an instance.
7513 Note: Locking is not within the scope of this class.
7516 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7517 disks, delay_iallocator, early_release):
7518 """Initializes this class.
7521 Tasklet.__init__(self, lu)
7524 self.instance_name = instance_name
7526 self.iallocator_name = iallocator_name
7527 self.remote_node = remote_node
7529 self.delay_iallocator = delay_iallocator
7530 self.early_release = early_release
7533 self.instance = None
7534 self.new_node = None
7535 self.target_node = None
7536 self.other_node = None
7537 self.remote_node_info = None
7538 self.node_secondary_ip = None
7541 def CheckArguments(mode, remote_node, iallocator):
7542 """Helper function for users of this class.
7545 # check for valid parameter combination
7546 if mode == constants.REPLACE_DISK_CHG:
7547 if remote_node is None and iallocator is None:
7548 raise errors.OpPrereqError("When changing the secondary either an"
7549 " iallocator script must be used or the"
7550 " new node given", errors.ECODE_INVAL)
7552 if remote_node is not None and iallocator is not None:
7553 raise errors.OpPrereqError("Give either the iallocator or the new"
7554 " secondary, not both", errors.ECODE_INVAL)
7556 elif remote_node is not None or iallocator is not None:
7557 # Not replacing the secondary
7558 raise errors.OpPrereqError("The iallocator and new node options can"
7559 " only be used when changing the"
7560 " secondary node", errors.ECODE_INVAL)
7563 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7564 """Compute a new secondary node using an IAllocator.
7567 ial = IAllocator(lu.cfg, lu.rpc,
7568 mode=constants.IALLOCATOR_MODE_RELOC,
7570 relocate_from=relocate_from)
7572 ial.Run(iallocator_name)
7575 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7576 " %s" % (iallocator_name, ial.info),
7579 if len(ial.result) != ial.required_nodes:
7580 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7581 " of nodes (%s), required %s" %
7583 len(ial.result), ial.required_nodes),
7586 remote_node_name = ial.result[0]
7588 lu.LogInfo("Selected new secondary for instance '%s': %s",
7589 instance_name, remote_node_name)
7591 return remote_node_name
7593 def _FindFaultyDisks(self, node_name):
7594 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7597 def CheckPrereq(self):
7598 """Check prerequisites.
7600 This checks that the instance is in the cluster.
7603 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7604 assert instance is not None, \
7605 "Cannot retrieve locked instance %s" % self.instance_name
7607 if instance.disk_template != constants.DT_DRBD8:
7608 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7609 " instances", errors.ECODE_INVAL)
7611 if len(instance.secondary_nodes) != 1:
7612 raise errors.OpPrereqError("The instance has a strange layout,"
7613 " expected one secondary but found %d" %
7614 len(instance.secondary_nodes),
7617 if not self.delay_iallocator:
7618 self._CheckPrereq2()
7620 def _CheckPrereq2(self):
7621 """Check prerequisites, second part.
7623 This function should always be part of CheckPrereq. It was separated and is
7624 now called from Exec because during node evacuation iallocator was only
7625 called with an unmodified cluster model, not taking planned changes into
7629 instance = self.instance
7630 secondary_node = instance.secondary_nodes[0]
7632 if self.iallocator_name is None:
7633 remote_node = self.remote_node
7635 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7636 instance.name, instance.secondary_nodes)
7638 if remote_node is not None:
7639 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7640 assert self.remote_node_info is not None, \
7641 "Cannot retrieve locked node %s" % remote_node
7643 self.remote_node_info = None
7645 if remote_node == self.instance.primary_node:
7646 raise errors.OpPrereqError("The specified node is the primary node of"
7647 " the instance.", errors.ECODE_INVAL)
7649 if remote_node == secondary_node:
7650 raise errors.OpPrereqError("The specified node is already the"
7651 " secondary node of the instance.",
7654 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7655 constants.REPLACE_DISK_CHG):
7656 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7659 if self.mode == constants.REPLACE_DISK_AUTO:
7660 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7661 faulty_secondary = self._FindFaultyDisks(secondary_node)
7663 if faulty_primary and faulty_secondary:
7664 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7665 " one node and can not be repaired"
7666 " automatically" % self.instance_name,
7670 self.disks = faulty_primary
7671 self.target_node = instance.primary_node
7672 self.other_node = secondary_node
7673 check_nodes = [self.target_node, self.other_node]
7674 elif faulty_secondary:
7675 self.disks = faulty_secondary
7676 self.target_node = secondary_node
7677 self.other_node = instance.primary_node
7678 check_nodes = [self.target_node, self.other_node]
7684 # Non-automatic modes
7685 if self.mode == constants.REPLACE_DISK_PRI:
7686 self.target_node = instance.primary_node
7687 self.other_node = secondary_node
7688 check_nodes = [self.target_node, self.other_node]
7690 elif self.mode == constants.REPLACE_DISK_SEC:
7691 self.target_node = secondary_node
7692 self.other_node = instance.primary_node
7693 check_nodes = [self.target_node, self.other_node]
7695 elif self.mode == constants.REPLACE_DISK_CHG:
7696 self.new_node = remote_node
7697 self.other_node = instance.primary_node
7698 self.target_node = secondary_node
7699 check_nodes = [self.new_node, self.other_node]
7701 _CheckNodeNotDrained(self.lu, remote_node)
7703 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7704 assert old_node_info is not None
7705 if old_node_info.offline and not self.early_release:
7706 # doesn't make sense to delay the release
7707 self.early_release = True
7708 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7709 " early-release mode", secondary_node)
7712 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7715 # If not specified all disks should be replaced
7717 self.disks = range(len(self.instance.disks))
7719 for node in check_nodes:
7720 _CheckNodeOnline(self.lu, node)
7722 # Check whether disks are valid
7723 for disk_idx in self.disks:
7724 instance.FindDisk(disk_idx)
7726 # Get secondary node IP addresses
7729 for node_name in [self.target_node, self.other_node, self.new_node]:
7730 if node_name is not None:
7731 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7733 self.node_secondary_ip = node_2nd_ip
7735 def Exec(self, feedback_fn):
7736 """Execute disk replacement.
7738 This dispatches the disk replacement to the appropriate handler.
7741 if self.delay_iallocator:
7742 self._CheckPrereq2()
7745 feedback_fn("No disks need replacement")
7748 feedback_fn("Replacing disk(s) %s for %s" %
7749 (utils.CommaJoin(self.disks), self.instance.name))
7751 activate_disks = (not self.instance.admin_up)
7753 # Activate the instance disks if we're replacing them on a down instance
7755 _StartInstanceDisks(self.lu, self.instance, True)
7758 # Should we replace the secondary node?
7759 if self.new_node is not None:
7760 fn = self._ExecDrbd8Secondary
7762 fn = self._ExecDrbd8DiskOnly
7764 return fn(feedback_fn)
7767 # Deactivate the instance disks if we're replacing them on a
7770 _SafeShutdownInstanceDisks(self.lu, self.instance)
7772 def _CheckVolumeGroup(self, nodes):
7773 self.lu.LogInfo("Checking volume groups")
7775 vgname = self.cfg.GetVGName()
7777 # Make sure volume group exists on all involved nodes
7778 results = self.rpc.call_vg_list(nodes)
7780 raise errors.OpExecError("Can't list volume groups on the nodes")
7784 res.Raise("Error checking node %s" % node)
7785 if vgname not in res.payload:
7786 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7789 def _CheckDisksExistence(self, nodes):
7790 # Check disk existence
7791 for idx, dev in enumerate(self.instance.disks):
7792 if idx not in self.disks:
7796 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7797 self.cfg.SetDiskID(dev, node)
7799 result = self.rpc.call_blockdev_find(node, dev)
7801 msg = result.fail_msg
7802 if msg or not result.payload:
7804 msg = "disk not found"
7805 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7808 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7809 for idx, dev in enumerate(self.instance.disks):
7810 if idx not in self.disks:
7813 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7816 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7818 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7819 " replace disks for instance %s" %
7820 (node_name, self.instance.name))
7822 def _CreateNewStorage(self, node_name):
7823 vgname = self.cfg.GetVGName()
7826 for idx, dev in enumerate(self.instance.disks):
7827 if idx not in self.disks:
7830 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7832 self.cfg.SetDiskID(dev, node_name)
7834 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7835 names = _GenerateUniqueNames(self.lu, lv_names)
7837 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7838 logical_id=(vgname, names[0]))
7839 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7840 logical_id=(vgname, names[1]))
7842 new_lvs = [lv_data, lv_meta]
7843 old_lvs = dev.children
7844 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7846 # we pass force_create=True to force the LVM creation
7847 for new_lv in new_lvs:
7848 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7849 _GetInstanceInfoText(self.instance), False)
7853 def _CheckDevices(self, node_name, iv_names):
7854 for name, (dev, _, _) in iv_names.iteritems():
7855 self.cfg.SetDiskID(dev, node_name)
7857 result = self.rpc.call_blockdev_find(node_name, dev)
7859 msg = result.fail_msg
7860 if msg or not result.payload:
7862 msg = "disk not found"
7863 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7866 if result.payload.is_degraded:
7867 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7869 def _RemoveOldStorage(self, node_name, iv_names):
7870 for name, (_, old_lvs, _) in iv_names.iteritems():
7871 self.lu.LogInfo("Remove logical volumes for %s" % name)
7874 self.cfg.SetDiskID(lv, node_name)
7876 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7878 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7879 hint="remove unused LVs manually")
7881 def _ReleaseNodeLock(self, node_name):
7882 """Releases the lock for a given node."""
7883 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7885 def _ExecDrbd8DiskOnly(self, feedback_fn):
7886 """Replace a disk on the primary or secondary for DRBD 8.
7888 The algorithm for replace is quite complicated:
7890 1. for each disk to be replaced:
7892 1. create new LVs on the target node with unique names
7893 1. detach old LVs from the drbd device
7894 1. rename old LVs to name_replaced.<time_t>
7895 1. rename new LVs to old LVs
7896 1. attach the new LVs (with the old names now) to the drbd device
7898 1. wait for sync across all devices
7900 1. for each modified disk:
7902 1. remove old LVs (which have the name name_replaces.<time_t>)
7904 Failures are not very well handled.
7909 # Step: check device activation
7910 self.lu.LogStep(1, steps_total, "Check device existence")
7911 self._CheckDisksExistence([self.other_node, self.target_node])
7912 self._CheckVolumeGroup([self.target_node, self.other_node])
7914 # Step: check other node consistency
7915 self.lu.LogStep(2, steps_total, "Check peer consistency")
7916 self._CheckDisksConsistency(self.other_node,
7917 self.other_node == self.instance.primary_node,
7920 # Step: create new storage
7921 self.lu.LogStep(3, steps_total, "Allocate new storage")
7922 iv_names = self._CreateNewStorage(self.target_node)
7924 # Step: for each lv, detach+rename*2+attach
7925 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7926 for dev, old_lvs, new_lvs in iv_names.itervalues():
7927 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7929 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7931 result.Raise("Can't detach drbd from local storage on node"
7932 " %s for device %s" % (self.target_node, dev.iv_name))
7934 #cfg.Update(instance)
7936 # ok, we created the new LVs, so now we know we have the needed
7937 # storage; as such, we proceed on the target node to rename
7938 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7939 # using the assumption that logical_id == physical_id (which in
7940 # turn is the unique_id on that node)
7942 # FIXME(iustin): use a better name for the replaced LVs
7943 temp_suffix = int(time.time())
7944 ren_fn = lambda d, suff: (d.physical_id[0],
7945 d.physical_id[1] + "_replaced-%s" % suff)
7947 # Build the rename list based on what LVs exist on the node
7948 rename_old_to_new = []
7949 for to_ren in old_lvs:
7950 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7951 if not result.fail_msg and result.payload:
7953 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7955 self.lu.LogInfo("Renaming the old LVs on the target node")
7956 result = self.rpc.call_blockdev_rename(self.target_node,
7958 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7960 # Now we rename the new LVs to the old LVs
7961 self.lu.LogInfo("Renaming the new LVs on the target node")
7962 rename_new_to_old = [(new, old.physical_id)
7963 for old, new in zip(old_lvs, new_lvs)]
7964 result = self.rpc.call_blockdev_rename(self.target_node,
7966 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7968 for old, new in zip(old_lvs, new_lvs):
7969 new.logical_id = old.logical_id
7970 self.cfg.SetDiskID(new, self.target_node)
7972 for disk in old_lvs:
7973 disk.logical_id = ren_fn(disk, temp_suffix)
7974 self.cfg.SetDiskID(disk, self.target_node)
7976 # Now that the new lvs have the old name, we can add them to the device
7977 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7978 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7980 msg = result.fail_msg
7982 for new_lv in new_lvs:
7983 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7986 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7987 hint=("cleanup manually the unused logical"
7989 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7991 dev.children = new_lvs
7993 self.cfg.Update(self.instance, feedback_fn)
7996 if self.early_release:
7997 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7999 self._RemoveOldStorage(self.target_node, iv_names)
8000 # WARNING: we release both node locks here, do not do other RPCs
8001 # than WaitForSync to the primary node
8002 self._ReleaseNodeLock([self.target_node, self.other_node])
8005 # This can fail as the old devices are degraded and _WaitForSync
8006 # does a combined result over all disks, so we don't check its return value
8007 self.lu.LogStep(cstep, steps_total, "Sync devices")
8009 _WaitForSync(self.lu, self.instance)
8011 # Check all devices manually
8012 self._CheckDevices(self.instance.primary_node, iv_names)
8014 # Step: remove old storage
8015 if not self.early_release:
8016 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8018 self._RemoveOldStorage(self.target_node, iv_names)
8020 def _ExecDrbd8Secondary(self, feedback_fn):
8021 """Replace the secondary node for DRBD 8.
8023 The algorithm for replace is quite complicated:
8024 - for all disks of the instance:
8025 - create new LVs on the new node with same names
8026 - shutdown the drbd device on the old secondary
8027 - disconnect the drbd network on the primary
8028 - create the drbd device on the new secondary
8029 - network attach the drbd on the primary, using an artifice:
8030 the drbd code for Attach() will connect to the network if it
8031 finds a device which is connected to the good local disks but
8033 - wait for sync across all devices
8034 - remove all disks from the old secondary
8036 Failures are not very well handled.
8041 # Step: check device activation
8042 self.lu.LogStep(1, steps_total, "Check device existence")
8043 self._CheckDisksExistence([self.instance.primary_node])
8044 self._CheckVolumeGroup([self.instance.primary_node])
8046 # Step: check other node consistency
8047 self.lu.LogStep(2, steps_total, "Check peer consistency")
8048 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8050 # Step: create new storage
8051 self.lu.LogStep(3, steps_total, "Allocate new storage")
8052 for idx, dev in enumerate(self.instance.disks):
8053 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8054 (self.new_node, idx))
8055 # we pass force_create=True to force LVM creation
8056 for new_lv in dev.children:
8057 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8058 _GetInstanceInfoText(self.instance), False)
8060 # Step 4: dbrd minors and drbd setups changes
8061 # after this, we must manually remove the drbd minors on both the
8062 # error and the success paths
8063 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8064 minors = self.cfg.AllocateDRBDMinor([self.new_node
8065 for dev in self.instance.disks],
8067 logging.debug("Allocated minors %r", minors)
8070 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8071 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8072 (self.new_node, idx))
8073 # create new devices on new_node; note that we create two IDs:
8074 # one without port, so the drbd will be activated without
8075 # networking information on the new node at this stage, and one
8076 # with network, for the latter activation in step 4
8077 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8078 if self.instance.primary_node == o_node1:
8081 assert self.instance.primary_node == o_node2, "Three-node instance?"
8084 new_alone_id = (self.instance.primary_node, self.new_node, None,
8085 p_minor, new_minor, o_secret)
8086 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8087 p_minor, new_minor, o_secret)
8089 iv_names[idx] = (dev, dev.children, new_net_id)
8090 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8092 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8093 logical_id=new_alone_id,
8094 children=dev.children,
8097 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8098 _GetInstanceInfoText(self.instance), False)
8099 except errors.GenericError:
8100 self.cfg.ReleaseDRBDMinors(self.instance.name)
8103 # We have new devices, shutdown the drbd on the old secondary
8104 for idx, dev in enumerate(self.instance.disks):
8105 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8106 self.cfg.SetDiskID(dev, self.target_node)
8107 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8109 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8110 "node: %s" % (idx, msg),
8111 hint=("Please cleanup this device manually as"
8112 " soon as possible"))
8114 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8115 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8116 self.node_secondary_ip,
8117 self.instance.disks)\
8118 [self.instance.primary_node]
8120 msg = result.fail_msg
8122 # detaches didn't succeed (unlikely)
8123 self.cfg.ReleaseDRBDMinors(self.instance.name)
8124 raise errors.OpExecError("Can't detach the disks from the network on"
8125 " old node: %s" % (msg,))
8127 # if we managed to detach at least one, we update all the disks of
8128 # the instance to point to the new secondary
8129 self.lu.LogInfo("Updating instance configuration")
8130 for dev, _, new_logical_id in iv_names.itervalues():
8131 dev.logical_id = new_logical_id
8132 self.cfg.SetDiskID(dev, self.instance.primary_node)
8134 self.cfg.Update(self.instance, feedback_fn)
8136 # and now perform the drbd attach
8137 self.lu.LogInfo("Attaching primary drbds to new secondary"
8138 " (standalone => connected)")
8139 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8141 self.node_secondary_ip,
8142 self.instance.disks,
8145 for to_node, to_result in result.items():
8146 msg = to_result.fail_msg
8148 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8150 hint=("please do a gnt-instance info to see the"
8151 " status of disks"))
8153 if self.early_release:
8154 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8156 self._RemoveOldStorage(self.target_node, iv_names)
8157 # WARNING: we release all node locks here, do not do other RPCs
8158 # than WaitForSync to the primary node
8159 self._ReleaseNodeLock([self.instance.primary_node,
8164 # This can fail as the old devices are degraded and _WaitForSync
8165 # does a combined result over all disks, so we don't check its return value
8166 self.lu.LogStep(cstep, steps_total, "Sync devices")
8168 _WaitForSync(self.lu, self.instance)
8170 # Check all devices manually
8171 self._CheckDevices(self.instance.primary_node, iv_names)
8173 # Step: remove old storage
8174 if not self.early_release:
8175 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8176 self._RemoveOldStorage(self.target_node, iv_names)
8179 class LURepairNodeStorage(NoHooksLU):
8180 """Repairs the volume group on a node.
8185 ("storage_type", _NoDefault, _CheckStorageType),
8186 ("name", _NoDefault, _TNonEmptyString),
8187 ("ignore_consistency", False, _TBool),
8191 def CheckArguments(self):
8192 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8194 storage_type = self.op.storage_type
8196 if (constants.SO_FIX_CONSISTENCY not in
8197 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8198 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8199 " repaired" % storage_type,
8202 def ExpandNames(self):
8203 self.needed_locks = {
8204 locking.LEVEL_NODE: [self.op.node_name],
8207 def _CheckFaultyDisks(self, instance, node_name):
8208 """Ensure faulty disks abort the opcode or at least warn."""
8210 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8212 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8213 " node '%s'" % (instance.name, node_name),
8215 except errors.OpPrereqError, err:
8216 if self.op.ignore_consistency:
8217 self.proc.LogWarning(str(err.args[0]))
8221 def CheckPrereq(self):
8222 """Check prerequisites.
8225 # Check whether any instance on this node has faulty disks
8226 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8227 if not inst.admin_up:
8229 check_nodes = set(inst.all_nodes)
8230 check_nodes.discard(self.op.node_name)
8231 for inst_node_name in check_nodes:
8232 self._CheckFaultyDisks(inst, inst_node_name)
8234 def Exec(self, feedback_fn):
8235 feedback_fn("Repairing storage unit '%s' on %s ..." %
8236 (self.op.name, self.op.node_name))
8238 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8239 result = self.rpc.call_storage_execute(self.op.node_name,
8240 self.op.storage_type, st_args,
8242 constants.SO_FIX_CONSISTENCY)
8243 result.Raise("Failed to repair storage unit '%s' on %s" %
8244 (self.op.name, self.op.node_name))
8247 class LUNodeEvacuationStrategy(NoHooksLU):
8248 """Computes the node evacuation strategy.
8252 ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8253 ("remote_node", None, _TMaybeString),
8254 ("iallocator", None, _TMaybeString),
8258 def CheckArguments(self):
8259 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8261 def ExpandNames(self):
8262 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8263 self.needed_locks = locks = {}
8264 if self.op.remote_node is None:
8265 locks[locking.LEVEL_NODE] = locking.ALL_SET
8267 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8268 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8270 def Exec(self, feedback_fn):
8271 if self.op.remote_node is not None:
8273 for node in self.op.nodes:
8274 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8277 if i.primary_node == self.op.remote_node:
8278 raise errors.OpPrereqError("Node %s is the primary node of"
8279 " instance %s, cannot use it as"
8281 (self.op.remote_node, i.name),
8283 result.append([i.name, self.op.remote_node])
8285 ial = IAllocator(self.cfg, self.rpc,
8286 mode=constants.IALLOCATOR_MODE_MEVAC,
8287 evac_nodes=self.op.nodes)
8288 ial.Run(self.op.iallocator, validate=True)
8290 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8296 class LUGrowDisk(LogicalUnit):
8297 """Grow a disk of an instance.
8301 HTYPE = constants.HTYPE_INSTANCE
8304 ("disk", _NoDefault, _TInt),
8305 ("amount", _NoDefault, _TInt),
8306 ("wait_for_sync", True, _TBool),
8310 def ExpandNames(self):
8311 self._ExpandAndLockInstance()
8312 self.needed_locks[locking.LEVEL_NODE] = []
8313 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8315 def DeclareLocks(self, level):
8316 if level == locking.LEVEL_NODE:
8317 self._LockInstancesNodes()
8319 def BuildHooksEnv(self):
8322 This runs on the master, the primary and all the secondaries.
8326 "DISK": self.op.disk,
8327 "AMOUNT": self.op.amount,
8329 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8330 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8333 def CheckPrereq(self):
8334 """Check prerequisites.
8336 This checks that the instance is in the cluster.
8339 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8340 assert instance is not None, \
8341 "Cannot retrieve locked instance %s" % self.op.instance_name
8342 nodenames = list(instance.all_nodes)
8343 for node in nodenames:
8344 _CheckNodeOnline(self, node)
8346 self.instance = instance
8348 if instance.disk_template not in constants.DTS_GROWABLE:
8349 raise errors.OpPrereqError("Instance's disk layout does not support"
8350 " growing.", errors.ECODE_INVAL)
8352 self.disk = instance.FindDisk(self.op.disk)
8354 if instance.disk_template != constants.DT_FILE:
8355 # TODO: check the free disk space for file, when that feature will be
8357 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8359 def Exec(self, feedback_fn):
8360 """Execute disk grow.
8363 instance = self.instance
8366 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8368 raise errors.OpExecError("Cannot activate block device to grow")
8370 for node in instance.all_nodes:
8371 self.cfg.SetDiskID(disk, node)
8372 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8373 result.Raise("Grow request failed to node %s" % node)
8375 # TODO: Rewrite code to work properly
8376 # DRBD goes into sync mode for a short amount of time after executing the
8377 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8378 # calling "resize" in sync mode fails. Sleeping for a short amount of
8379 # time is a work-around.
8382 disk.RecordGrow(self.op.amount)
8383 self.cfg.Update(instance, feedback_fn)
8384 if self.op.wait_for_sync:
8385 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8387 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8388 " status.\nPlease check the instance.")
8389 if not instance.admin_up:
8390 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8391 elif not instance.admin_up:
8392 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8393 " not supposed to be running because no wait for"
8394 " sync mode was requested.")
8397 class LUQueryInstanceData(NoHooksLU):
8398 """Query runtime instance data.
8402 ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8403 ("static", False, _TBool),
8407 def ExpandNames(self):
8408 self.needed_locks = {}
8409 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8411 if self.op.instances:
8412 self.wanted_names = []
8413 for name in self.op.instances:
8414 full_name = _ExpandInstanceName(self.cfg, name)
8415 self.wanted_names.append(full_name)
8416 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8418 self.wanted_names = None
8419 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8421 self.needed_locks[locking.LEVEL_NODE] = []
8422 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8424 def DeclareLocks(self, level):
8425 if level == locking.LEVEL_NODE:
8426 self._LockInstancesNodes()
8428 def CheckPrereq(self):
8429 """Check prerequisites.
8431 This only checks the optional instance list against the existing names.
8434 if self.wanted_names is None:
8435 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8437 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8438 in self.wanted_names]
8440 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8441 """Returns the status of a block device
8444 if self.op.static or not node:
8447 self.cfg.SetDiskID(dev, node)
8449 result = self.rpc.call_blockdev_find(node, dev)
8453 result.Raise("Can't compute disk status for %s" % instance_name)
8455 status = result.payload
8459 return (status.dev_path, status.major, status.minor,
8460 status.sync_percent, status.estimated_time,
8461 status.is_degraded, status.ldisk_status)
8463 def _ComputeDiskStatus(self, instance, snode, dev):
8464 """Compute block device status.
8467 if dev.dev_type in constants.LDS_DRBD:
8468 # we change the snode then (otherwise we use the one passed in)
8469 if dev.logical_id[0] == instance.primary_node:
8470 snode = dev.logical_id[1]
8472 snode = dev.logical_id[0]
8474 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8476 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8479 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8480 for child in dev.children]
8485 "iv_name": dev.iv_name,
8486 "dev_type": dev.dev_type,
8487 "logical_id": dev.logical_id,
8488 "physical_id": dev.physical_id,
8489 "pstatus": dev_pstatus,
8490 "sstatus": dev_sstatus,
8491 "children": dev_children,
8498 def Exec(self, feedback_fn):
8499 """Gather and return data"""
8502 cluster = self.cfg.GetClusterInfo()
8504 for instance in self.wanted_instances:
8505 if not self.op.static:
8506 remote_info = self.rpc.call_instance_info(instance.primary_node,
8508 instance.hypervisor)
8509 remote_info.Raise("Error checking node %s" % instance.primary_node)
8510 remote_info = remote_info.payload
8511 if remote_info and "state" in remote_info:
8514 remote_state = "down"
8517 if instance.admin_up:
8520 config_state = "down"
8522 disks = [self._ComputeDiskStatus(instance, None, device)
8523 for device in instance.disks]
8526 "name": instance.name,
8527 "config_state": config_state,
8528 "run_state": remote_state,
8529 "pnode": instance.primary_node,
8530 "snodes": instance.secondary_nodes,
8532 # this happens to be the same format used for hooks
8533 "nics": _NICListToTuple(self, instance.nics),
8534 "disk_template": instance.disk_template,
8536 "hypervisor": instance.hypervisor,
8537 "network_port": instance.network_port,
8538 "hv_instance": instance.hvparams,
8539 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8540 "be_instance": instance.beparams,
8541 "be_actual": cluster.FillBE(instance),
8542 "os_instance": instance.osparams,
8543 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8544 "serial_no": instance.serial_no,
8545 "mtime": instance.mtime,
8546 "ctime": instance.ctime,
8547 "uuid": instance.uuid,
8550 result[instance.name] = idict
8555 class LUSetInstanceParams(LogicalUnit):
8556 """Modifies an instances's parameters.
8559 HPATH = "instance-modify"
8560 HTYPE = constants.HTYPE_INSTANCE
8563 ("nics", _EmptyList, _TList),
8564 ("disks", _EmptyList, _TList),
8565 ("beparams", _EmptyDict, _TDict),
8566 ("hvparams", _EmptyDict, _TDict),
8567 ("disk_template", None, _TMaybeString),
8568 ("remote_node", None, _TMaybeString),
8569 ("os_name", None, _TMaybeString),
8570 ("force_variant", False, _TBool),
8571 ("osparams", None, _TOr(_TDict, _TNone)),
8576 def CheckArguments(self):
8577 if not (self.op.nics or self.op.disks or self.op.disk_template or
8578 self.op.hvparams or self.op.beparams or self.op.os_name):
8579 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8581 if self.op.hvparams:
8582 _CheckGlobalHvParams(self.op.hvparams)
8586 for disk_op, disk_dict in self.op.disks:
8587 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8588 if disk_op == constants.DDM_REMOVE:
8591 elif disk_op == constants.DDM_ADD:
8594 if not isinstance(disk_op, int):
8595 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8596 if not isinstance(disk_dict, dict):
8597 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8598 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8600 if disk_op == constants.DDM_ADD:
8601 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8602 if mode not in constants.DISK_ACCESS_SET:
8603 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8605 size = disk_dict.get('size', None)
8607 raise errors.OpPrereqError("Required disk parameter size missing",
8611 except (TypeError, ValueError), err:
8612 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8613 str(err), errors.ECODE_INVAL)
8614 disk_dict['size'] = size
8616 # modification of disk
8617 if 'size' in disk_dict:
8618 raise errors.OpPrereqError("Disk size change not possible, use"
8619 " grow-disk", errors.ECODE_INVAL)
8621 if disk_addremove > 1:
8622 raise errors.OpPrereqError("Only one disk add or remove operation"
8623 " supported at a time", errors.ECODE_INVAL)
8625 if self.op.disks and self.op.disk_template is not None:
8626 raise errors.OpPrereqError("Disk template conversion and other disk"
8627 " changes not supported at the same time",
8630 if self.op.disk_template:
8631 _CheckDiskTemplate(self.op.disk_template)
8632 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8633 self.op.remote_node is None):
8634 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8635 " one requires specifying a secondary node",
8640 for nic_op, nic_dict in self.op.nics:
8641 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8642 if nic_op == constants.DDM_REMOVE:
8645 elif nic_op == constants.DDM_ADD:
8648 if not isinstance(nic_op, int):
8649 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8650 if not isinstance(nic_dict, dict):
8651 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8652 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8654 # nic_dict should be a dict
8655 nic_ip = nic_dict.get('ip', None)
8656 if nic_ip is not None:
8657 if nic_ip.lower() == constants.VALUE_NONE:
8658 nic_dict['ip'] = None
8660 if not netutils.IsValidIP4(nic_ip):
8661 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8664 nic_bridge = nic_dict.get('bridge', None)
8665 nic_link = nic_dict.get('link', None)
8666 if nic_bridge and nic_link:
8667 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8668 " at the same time", errors.ECODE_INVAL)
8669 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8670 nic_dict['bridge'] = None
8671 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8672 nic_dict['link'] = None
8674 if nic_op == constants.DDM_ADD:
8675 nic_mac = nic_dict.get('mac', None)
8677 nic_dict['mac'] = constants.VALUE_AUTO
8679 if 'mac' in nic_dict:
8680 nic_mac = nic_dict['mac']
8681 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8682 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8684 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8685 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8686 " modifying an existing nic",
8689 if nic_addremove > 1:
8690 raise errors.OpPrereqError("Only one NIC add or remove operation"
8691 " supported at a time", errors.ECODE_INVAL)
8693 def ExpandNames(self):
8694 self._ExpandAndLockInstance()
8695 self.needed_locks[locking.LEVEL_NODE] = []
8696 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8698 def DeclareLocks(self, level):
8699 if level == locking.LEVEL_NODE:
8700 self._LockInstancesNodes()
8701 if self.op.disk_template and self.op.remote_node:
8702 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8703 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8705 def BuildHooksEnv(self):
8708 This runs on the master, primary and secondaries.
8712 if constants.BE_MEMORY in self.be_new:
8713 args['memory'] = self.be_new[constants.BE_MEMORY]
8714 if constants.BE_VCPUS in self.be_new:
8715 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8716 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8717 # information at all.
8720 nic_override = dict(self.op.nics)
8721 for idx, nic in enumerate(self.instance.nics):
8722 if idx in nic_override:
8723 this_nic_override = nic_override[idx]
8725 this_nic_override = {}
8726 if 'ip' in this_nic_override:
8727 ip = this_nic_override['ip']
8730 if 'mac' in this_nic_override:
8731 mac = this_nic_override['mac']
8734 if idx in self.nic_pnew:
8735 nicparams = self.nic_pnew[idx]
8737 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8738 mode = nicparams[constants.NIC_MODE]
8739 link = nicparams[constants.NIC_LINK]
8740 args['nics'].append((ip, mac, mode, link))
8741 if constants.DDM_ADD in nic_override:
8742 ip = nic_override[constants.DDM_ADD].get('ip', None)
8743 mac = nic_override[constants.DDM_ADD]['mac']
8744 nicparams = self.nic_pnew[constants.DDM_ADD]
8745 mode = nicparams[constants.NIC_MODE]
8746 link = nicparams[constants.NIC_LINK]
8747 args['nics'].append((ip, mac, mode, link))
8748 elif constants.DDM_REMOVE in nic_override:
8749 del args['nics'][-1]
8751 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8752 if self.op.disk_template:
8753 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8754 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8757 def CheckPrereq(self):
8758 """Check prerequisites.
8760 This only checks the instance list against the existing names.
8763 # checking the new params on the primary/secondary nodes
8765 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8766 cluster = self.cluster = self.cfg.GetClusterInfo()
8767 assert self.instance is not None, \
8768 "Cannot retrieve locked instance %s" % self.op.instance_name
8769 pnode = instance.primary_node
8770 nodelist = list(instance.all_nodes)
8773 if self.op.os_name and not self.op.force:
8774 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8775 self.op.force_variant)
8776 instance_os = self.op.os_name
8778 instance_os = instance.os
8780 if self.op.disk_template:
8781 if instance.disk_template == self.op.disk_template:
8782 raise errors.OpPrereqError("Instance already has disk template %s" %
8783 instance.disk_template, errors.ECODE_INVAL)
8785 if (instance.disk_template,
8786 self.op.disk_template) not in self._DISK_CONVERSIONS:
8787 raise errors.OpPrereqError("Unsupported disk template conversion from"
8788 " %s to %s" % (instance.disk_template,
8789 self.op.disk_template),
8791 _CheckInstanceDown(self, instance, "cannot change disk template")
8792 if self.op.disk_template in constants.DTS_NET_MIRROR:
8793 if self.op.remote_node == pnode:
8794 raise errors.OpPrereqError("Given new secondary node %s is the same"
8795 " as the primary node of the instance" %
8796 self.op.remote_node, errors.ECODE_STATE)
8797 _CheckNodeOnline(self, self.op.remote_node)
8798 _CheckNodeNotDrained(self, self.op.remote_node)
8799 disks = [{"size": d.size} for d in instance.disks]
8800 required = _ComputeDiskSize(self.op.disk_template, disks)
8801 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8803 # hvparams processing
8804 if self.op.hvparams:
8805 hv_type = instance.hypervisor
8806 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8807 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8808 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8811 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8812 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8813 self.hv_new = hv_new # the new actual values
8814 self.hv_inst = i_hvdict # the new dict (without defaults)
8816 self.hv_new = self.hv_inst = {}
8818 # beparams processing
8819 if self.op.beparams:
8820 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8822 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8823 be_new = cluster.SimpleFillBE(i_bedict)
8824 self.be_new = be_new # the new actual values
8825 self.be_inst = i_bedict # the new dict (without defaults)
8827 self.be_new = self.be_inst = {}
8829 # osparams processing
8830 if self.op.osparams:
8831 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8832 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8833 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8834 self.os_inst = i_osdict # the new dict (without defaults)
8836 self.os_new = self.os_inst = {}
8840 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8841 mem_check_list = [pnode]
8842 if be_new[constants.BE_AUTO_BALANCE]:
8843 # either we changed auto_balance to yes or it was from before
8844 mem_check_list.extend(instance.secondary_nodes)
8845 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8846 instance.hypervisor)
8847 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8848 instance.hypervisor)
8849 pninfo = nodeinfo[pnode]
8850 msg = pninfo.fail_msg
8852 # Assume the primary node is unreachable and go ahead
8853 self.warn.append("Can't get info from primary node %s: %s" %
8855 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8856 self.warn.append("Node data from primary node %s doesn't contain"
8857 " free memory information" % pnode)
8858 elif instance_info.fail_msg:
8859 self.warn.append("Can't get instance runtime information: %s" %
8860 instance_info.fail_msg)
8862 if instance_info.payload:
8863 current_mem = int(instance_info.payload['memory'])
8865 # Assume instance not running
8866 # (there is a slight race condition here, but it's not very probable,
8867 # and we have no other way to check)
8869 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8870 pninfo.payload['memory_free'])
8872 raise errors.OpPrereqError("This change will prevent the instance"
8873 " from starting, due to %d MB of memory"
8874 " missing on its primary node" % miss_mem,
8877 if be_new[constants.BE_AUTO_BALANCE]:
8878 for node, nres in nodeinfo.items():
8879 if node not in instance.secondary_nodes:
8883 self.warn.append("Can't get info from secondary node %s: %s" %
8885 elif not isinstance(nres.payload.get('memory_free', None), int):
8886 self.warn.append("Secondary node %s didn't return free"
8887 " memory information" % node)
8888 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8889 self.warn.append("Not enough memory to failover instance to"
8890 " secondary node %s" % node)
8895 for nic_op, nic_dict in self.op.nics:
8896 if nic_op == constants.DDM_REMOVE:
8897 if not instance.nics:
8898 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8901 if nic_op != constants.DDM_ADD:
8903 if not instance.nics:
8904 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8905 " no NICs" % nic_op,
8907 if nic_op < 0 or nic_op >= len(instance.nics):
8908 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8910 (nic_op, len(instance.nics) - 1),
8912 old_nic_params = instance.nics[nic_op].nicparams
8913 old_nic_ip = instance.nics[nic_op].ip
8918 update_params_dict = dict([(key, nic_dict[key])
8919 for key in constants.NICS_PARAMETERS
8920 if key in nic_dict])
8922 if 'bridge' in nic_dict:
8923 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8925 new_nic_params = _GetUpdatedParams(old_nic_params,
8927 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8928 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8929 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8930 self.nic_pinst[nic_op] = new_nic_params
8931 self.nic_pnew[nic_op] = new_filled_nic_params
8932 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8934 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8935 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8936 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8938 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8940 self.warn.append(msg)
8942 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8943 if new_nic_mode == constants.NIC_MODE_ROUTED:
8944 if 'ip' in nic_dict:
8945 nic_ip = nic_dict['ip']
8949 raise errors.OpPrereqError('Cannot set the nic ip to None'
8950 ' on a routed nic', errors.ECODE_INVAL)
8951 if 'mac' in nic_dict:
8952 nic_mac = nic_dict['mac']
8954 raise errors.OpPrereqError('Cannot set the nic mac to None',
8956 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8957 # otherwise generate the mac
8958 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8960 # or validate/reserve the current one
8962 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8963 except errors.ReservationError:
8964 raise errors.OpPrereqError("MAC address %s already in use"
8965 " in cluster" % nic_mac,
8966 errors.ECODE_NOTUNIQUE)
8969 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8970 raise errors.OpPrereqError("Disk operations not supported for"
8971 " diskless instances",
8973 for disk_op, _ in self.op.disks:
8974 if disk_op == constants.DDM_REMOVE:
8975 if len(instance.disks) == 1:
8976 raise errors.OpPrereqError("Cannot remove the last disk of"
8977 " an instance", errors.ECODE_INVAL)
8978 _CheckInstanceDown(self, instance, "cannot remove disks")
8980 if (disk_op == constants.DDM_ADD and
8981 len(instance.nics) >= constants.MAX_DISKS):
8982 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8983 " add more" % constants.MAX_DISKS,
8985 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8987 if disk_op < 0 or disk_op >= len(instance.disks):
8988 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8990 (disk_op, len(instance.disks)),
8995 def _ConvertPlainToDrbd(self, feedback_fn):
8996 """Converts an instance from plain to drbd.
8999 feedback_fn("Converting template to drbd")
9000 instance = self.instance
9001 pnode = instance.primary_node
9002 snode = self.op.remote_node
9004 # create a fake disk info for _GenerateDiskTemplate
9005 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9006 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9007 instance.name, pnode, [snode],
9008 disk_info, None, None, 0)
9009 info = _GetInstanceInfoText(instance)
9010 feedback_fn("Creating aditional volumes...")
9011 # first, create the missing data and meta devices
9012 for disk in new_disks:
9013 # unfortunately this is... not too nice
9014 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9016 for child in disk.children:
9017 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9018 # at this stage, all new LVs have been created, we can rename the
9020 feedback_fn("Renaming original volumes...")
9021 rename_list = [(o, n.children[0].logical_id)
9022 for (o, n) in zip(instance.disks, new_disks)]
9023 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9024 result.Raise("Failed to rename original LVs")
9026 feedback_fn("Initializing DRBD devices...")
9027 # all child devices are in place, we can now create the DRBD devices
9028 for disk in new_disks:
9029 for node in [pnode, snode]:
9030 f_create = node == pnode
9031 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9033 # at this point, the instance has been modified
9034 instance.disk_template = constants.DT_DRBD8
9035 instance.disks = new_disks
9036 self.cfg.Update(instance, feedback_fn)
9038 # disks are created, waiting for sync
9039 disk_abort = not _WaitForSync(self, instance)
9041 raise errors.OpExecError("There are some degraded disks for"
9042 " this instance, please cleanup manually")
9044 def _ConvertDrbdToPlain(self, feedback_fn):
9045 """Converts an instance from drbd to plain.
9048 instance = self.instance
9049 assert len(instance.secondary_nodes) == 1
9050 pnode = instance.primary_node
9051 snode = instance.secondary_nodes[0]
9052 feedback_fn("Converting template to plain")
9054 old_disks = instance.disks
9055 new_disks = [d.children[0] for d in old_disks]
9057 # copy over size and mode
9058 for parent, child in zip(old_disks, new_disks):
9059 child.size = parent.size
9060 child.mode = parent.mode
9062 # update instance structure
9063 instance.disks = new_disks
9064 instance.disk_template = constants.DT_PLAIN
9065 self.cfg.Update(instance, feedback_fn)
9067 feedback_fn("Removing volumes on the secondary node...")
9068 for disk in old_disks:
9069 self.cfg.SetDiskID(disk, snode)
9070 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9072 self.LogWarning("Could not remove block device %s on node %s,"
9073 " continuing anyway: %s", disk.iv_name, snode, msg)
9075 feedback_fn("Removing unneeded volumes on the primary node...")
9076 for idx, disk in enumerate(old_disks):
9077 meta = disk.children[1]
9078 self.cfg.SetDiskID(meta, pnode)
9079 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9081 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9082 " continuing anyway: %s", idx, pnode, msg)
9085 def Exec(self, feedback_fn):
9086 """Modifies an instance.
9088 All parameters take effect only at the next restart of the instance.
9091 # Process here the warnings from CheckPrereq, as we don't have a
9092 # feedback_fn there.
9093 for warn in self.warn:
9094 feedback_fn("WARNING: %s" % warn)
9097 instance = self.instance
9099 for disk_op, disk_dict in self.op.disks:
9100 if disk_op == constants.DDM_REMOVE:
9101 # remove the last disk
9102 device = instance.disks.pop()
9103 device_idx = len(instance.disks)
9104 for node, disk in device.ComputeNodeTree(instance.primary_node):
9105 self.cfg.SetDiskID(disk, node)
9106 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9108 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9109 " continuing anyway", device_idx, node, msg)
9110 result.append(("disk/%d" % device_idx, "remove"))
9111 elif disk_op == constants.DDM_ADD:
9113 if instance.disk_template == constants.DT_FILE:
9114 file_driver, file_path = instance.disks[0].logical_id
9115 file_path = os.path.dirname(file_path)
9117 file_driver = file_path = None
9118 disk_idx_base = len(instance.disks)
9119 new_disk = _GenerateDiskTemplate(self,
9120 instance.disk_template,
9121 instance.name, instance.primary_node,
9122 instance.secondary_nodes,
9127 instance.disks.append(new_disk)
9128 info = _GetInstanceInfoText(instance)
9130 logging.info("Creating volume %s for instance %s",
9131 new_disk.iv_name, instance.name)
9132 # Note: this needs to be kept in sync with _CreateDisks
9134 for node in instance.all_nodes:
9135 f_create = node == instance.primary_node
9137 _CreateBlockDev(self, node, instance, new_disk,
9138 f_create, info, f_create)
9139 except errors.OpExecError, err:
9140 self.LogWarning("Failed to create volume %s (%s) on"
9142 new_disk.iv_name, new_disk, node, err)
9143 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9144 (new_disk.size, new_disk.mode)))
9146 # change a given disk
9147 instance.disks[disk_op].mode = disk_dict['mode']
9148 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9150 if self.op.disk_template:
9151 r_shut = _ShutdownInstanceDisks(self, instance)
9153 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9154 " proceed with disk template conversion")
9155 mode = (instance.disk_template, self.op.disk_template)
9157 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9159 self.cfg.ReleaseDRBDMinors(instance.name)
9161 result.append(("disk_template", self.op.disk_template))
9164 for nic_op, nic_dict in self.op.nics:
9165 if nic_op == constants.DDM_REMOVE:
9166 # remove the last nic
9167 del instance.nics[-1]
9168 result.append(("nic.%d" % len(instance.nics), "remove"))
9169 elif nic_op == constants.DDM_ADD:
9170 # mac and bridge should be set, by now
9171 mac = nic_dict['mac']
9172 ip = nic_dict.get('ip', None)
9173 nicparams = self.nic_pinst[constants.DDM_ADD]
9174 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9175 instance.nics.append(new_nic)
9176 result.append(("nic.%d" % (len(instance.nics) - 1),
9177 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9178 (new_nic.mac, new_nic.ip,
9179 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9180 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9183 for key in 'mac', 'ip':
9185 setattr(instance.nics[nic_op], key, nic_dict[key])
9186 if nic_op in self.nic_pinst:
9187 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9188 for key, val in nic_dict.iteritems():
9189 result.append(("nic.%s/%d" % (key, nic_op), val))
9192 if self.op.hvparams:
9193 instance.hvparams = self.hv_inst
9194 for key, val in self.op.hvparams.iteritems():
9195 result.append(("hv/%s" % key, val))
9198 if self.op.beparams:
9199 instance.beparams = self.be_inst
9200 for key, val in self.op.beparams.iteritems():
9201 result.append(("be/%s" % key, val))
9205 instance.os = self.op.os_name
9208 if self.op.osparams:
9209 instance.osparams = self.os_inst
9210 for key, val in self.op.osparams.iteritems():
9211 result.append(("os/%s" % key, val))
9213 self.cfg.Update(instance, feedback_fn)
9217 _DISK_CONVERSIONS = {
9218 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9219 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9223 class LUQueryExports(NoHooksLU):
9224 """Query the exports list
9228 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9229 ("use_locking", False, _TBool),
9233 def ExpandNames(self):
9234 self.needed_locks = {}
9235 self.share_locks[locking.LEVEL_NODE] = 1
9236 if not self.op.nodes:
9237 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9239 self.needed_locks[locking.LEVEL_NODE] = \
9240 _GetWantedNodes(self, self.op.nodes)
9242 def Exec(self, feedback_fn):
9243 """Compute the list of all the exported system images.
9246 @return: a dictionary with the structure node->(export-list)
9247 where export-list is a list of the instances exported on
9251 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9252 rpcresult = self.rpc.call_export_list(self.nodes)
9254 for node in rpcresult:
9255 if rpcresult[node].fail_msg:
9256 result[node] = False
9258 result[node] = rpcresult[node].payload
9263 class LUPrepareExport(NoHooksLU):
9264 """Prepares an instance for an export and returns useful information.
9269 ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9273 def ExpandNames(self):
9274 self._ExpandAndLockInstance()
9276 def CheckPrereq(self):
9277 """Check prerequisites.
9280 instance_name = self.op.instance_name
9282 self.instance = self.cfg.GetInstanceInfo(instance_name)
9283 assert self.instance is not None, \
9284 "Cannot retrieve locked instance %s" % self.op.instance_name
9285 _CheckNodeOnline(self, self.instance.primary_node)
9287 self._cds = _GetClusterDomainSecret()
9289 def Exec(self, feedback_fn):
9290 """Prepares an instance for an export.
9293 instance = self.instance
9295 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9296 salt = utils.GenerateSecret(8)
9298 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9299 result = self.rpc.call_x509_cert_create(instance.primary_node,
9300 constants.RIE_CERT_VALIDITY)
9301 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9303 (name, cert_pem) = result.payload
9305 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9309 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9310 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9312 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9318 class LUExportInstance(LogicalUnit):
9319 """Export an instance to an image in the cluster.
9322 HPATH = "instance-export"
9323 HTYPE = constants.HTYPE_INSTANCE
9326 ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9327 ("shutdown", True, _TBool),
9329 ("remove_instance", False, _TBool),
9330 ("ignore_remove_failures", False, _TBool),
9331 ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9332 ("x509_key_name", None, _TOr(_TList, _TNone)),
9333 ("destination_x509_ca", None, _TMaybeString),
9337 def CheckArguments(self):
9338 """Check the arguments.
9341 self.x509_key_name = self.op.x509_key_name
9342 self.dest_x509_ca_pem = self.op.destination_x509_ca
9344 if self.op.remove_instance and not self.op.shutdown:
9345 raise errors.OpPrereqError("Can not remove instance without shutting it"
9348 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9349 if not self.x509_key_name:
9350 raise errors.OpPrereqError("Missing X509 key name for encryption",
9353 if not self.dest_x509_ca_pem:
9354 raise errors.OpPrereqError("Missing destination X509 CA",
9357 def ExpandNames(self):
9358 self._ExpandAndLockInstance()
9360 # Lock all nodes for local exports
9361 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9362 # FIXME: lock only instance primary and destination node
9364 # Sad but true, for now we have do lock all nodes, as we don't know where
9365 # the previous export might be, and in this LU we search for it and
9366 # remove it from its current node. In the future we could fix this by:
9367 # - making a tasklet to search (share-lock all), then create the
9368 # new one, then one to remove, after
9369 # - removing the removal operation altogether
9370 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9372 def DeclareLocks(self, level):
9373 """Last minute lock declaration."""
9374 # All nodes are locked anyway, so nothing to do here.
9376 def BuildHooksEnv(self):
9379 This will run on the master, primary node and target node.
9383 "EXPORT_MODE": self.op.mode,
9384 "EXPORT_NODE": self.op.target_node,
9385 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9386 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9387 # TODO: Generic function for boolean env variables
9388 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9391 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9393 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9395 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9396 nl.append(self.op.target_node)
9400 def CheckPrereq(self):
9401 """Check prerequisites.
9403 This checks that the instance and node names are valid.
9406 instance_name = self.op.instance_name
9408 self.instance = self.cfg.GetInstanceInfo(instance_name)
9409 assert self.instance is not None, \
9410 "Cannot retrieve locked instance %s" % self.op.instance_name
9411 _CheckNodeOnline(self, self.instance.primary_node)
9413 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9414 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9415 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9416 assert self.dst_node is not None
9418 _CheckNodeOnline(self, self.dst_node.name)
9419 _CheckNodeNotDrained(self, self.dst_node.name)
9422 self.dest_disk_info = None
9423 self.dest_x509_ca = None
9425 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9426 self.dst_node = None
9428 if len(self.op.target_node) != len(self.instance.disks):
9429 raise errors.OpPrereqError(("Received destination information for %s"
9430 " disks, but instance %s has %s disks") %
9431 (len(self.op.target_node), instance_name,
9432 len(self.instance.disks)),
9435 cds = _GetClusterDomainSecret()
9437 # Check X509 key name
9439 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9440 except (TypeError, ValueError), err:
9441 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9443 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9444 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9447 # Load and verify CA
9449 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9450 except OpenSSL.crypto.Error, err:
9451 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9452 (err, ), errors.ECODE_INVAL)
9454 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9455 if errcode is not None:
9456 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9457 (msg, ), errors.ECODE_INVAL)
9459 self.dest_x509_ca = cert
9461 # Verify target information
9463 for idx, disk_data in enumerate(self.op.target_node):
9465 (host, port, magic) = \
9466 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9467 except errors.GenericError, err:
9468 raise errors.OpPrereqError("Target info for disk %s: %s" %
9469 (idx, err), errors.ECODE_INVAL)
9471 disk_info.append((host, port, magic))
9473 assert len(disk_info) == len(self.op.target_node)
9474 self.dest_disk_info = disk_info
9477 raise errors.ProgrammerError("Unhandled export mode %r" %
9480 # instance disk type verification
9481 # TODO: Implement export support for file-based disks
9482 for disk in self.instance.disks:
9483 if disk.dev_type == constants.LD_FILE:
9484 raise errors.OpPrereqError("Export not supported for instances with"
9485 " file-based disks", errors.ECODE_INVAL)
9487 def _CleanupExports(self, feedback_fn):
9488 """Removes exports of current instance from all other nodes.
9490 If an instance in a cluster with nodes A..D was exported to node C, its
9491 exports will be removed from the nodes A, B and D.
9494 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9496 nodelist = self.cfg.GetNodeList()
9497 nodelist.remove(self.dst_node.name)
9499 # on one-node clusters nodelist will be empty after the removal
9500 # if we proceed the backup would be removed because OpQueryExports
9501 # substitutes an empty list with the full cluster node list.
9502 iname = self.instance.name
9504 feedback_fn("Removing old exports for instance %s" % iname)
9505 exportlist = self.rpc.call_export_list(nodelist)
9506 for node in exportlist:
9507 if exportlist[node].fail_msg:
9509 if iname in exportlist[node].payload:
9510 msg = self.rpc.call_export_remove(node, iname).fail_msg
9512 self.LogWarning("Could not remove older export for instance %s"
9513 " on node %s: %s", iname, node, msg)
9515 def Exec(self, feedback_fn):
9516 """Export an instance to an image in the cluster.
9519 assert self.op.mode in constants.EXPORT_MODES
9521 instance = self.instance
9522 src_node = instance.primary_node
9524 if self.op.shutdown:
9525 # shutdown the instance, but not the disks
9526 feedback_fn("Shutting down instance %s" % instance.name)
9527 result = self.rpc.call_instance_shutdown(src_node, instance,
9528 self.op.shutdown_timeout)
9529 # TODO: Maybe ignore failures if ignore_remove_failures is set
9530 result.Raise("Could not shutdown instance %s on"
9531 " node %s" % (instance.name, src_node))
9533 # set the disks ID correctly since call_instance_start needs the
9534 # correct drbd minor to create the symlinks
9535 for disk in instance.disks:
9536 self.cfg.SetDiskID(disk, src_node)
9538 activate_disks = (not instance.admin_up)
9541 # Activate the instance disks if we'exporting a stopped instance
9542 feedback_fn("Activating disks for %s" % instance.name)
9543 _StartInstanceDisks(self, instance, None)
9546 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9549 helper.CreateSnapshots()
9551 if (self.op.shutdown and instance.admin_up and
9552 not self.op.remove_instance):
9553 assert not activate_disks
9554 feedback_fn("Starting instance %s" % instance.name)
9555 result = self.rpc.call_instance_start(src_node, instance, None, None)
9556 msg = result.fail_msg
9558 feedback_fn("Failed to start instance: %s" % msg)
9559 _ShutdownInstanceDisks(self, instance)
9560 raise errors.OpExecError("Could not start instance: %s" % msg)
9562 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9563 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9564 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9565 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9566 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9568 (key_name, _, _) = self.x509_key_name
9571 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9574 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9575 key_name, dest_ca_pem,
9580 # Check for backwards compatibility
9581 assert len(dresults) == len(instance.disks)
9582 assert compat.all(isinstance(i, bool) for i in dresults), \
9583 "Not all results are boolean: %r" % dresults
9587 feedback_fn("Deactivating disks for %s" % instance.name)
9588 _ShutdownInstanceDisks(self, instance)
9590 if not (compat.all(dresults) and fin_resu):
9593 failures.append("export finalization")
9594 if not compat.all(dresults):
9595 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9597 failures.append("disk export: disk(s) %s" % fdsk)
9599 raise errors.OpExecError("Export failed, errors in %s" %
9600 utils.CommaJoin(failures))
9602 # At this point, the export was successful, we can cleanup/finish
9604 # Remove instance if requested
9605 if self.op.remove_instance:
9606 feedback_fn("Removing instance %s" % instance.name)
9607 _RemoveInstance(self, feedback_fn, instance,
9608 self.op.ignore_remove_failures)
9610 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9611 self._CleanupExports(feedback_fn)
9613 return fin_resu, dresults
9616 class LURemoveExport(NoHooksLU):
9617 """Remove exports related to the named instance.
9625 def ExpandNames(self):
9626 self.needed_locks = {}
9627 # We need all nodes to be locked in order for RemoveExport to work, but we
9628 # don't need to lock the instance itself, as nothing will happen to it (and
9629 # we can remove exports also for a removed instance)
9630 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9632 def Exec(self, feedback_fn):
9633 """Remove any export.
9636 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9637 # If the instance was not found we'll try with the name that was passed in.
9638 # This will only work if it was an FQDN, though.
9640 if not instance_name:
9642 instance_name = self.op.instance_name
9644 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9645 exportlist = self.rpc.call_export_list(locked_nodes)
9647 for node in exportlist:
9648 msg = exportlist[node].fail_msg
9650 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9652 if instance_name in exportlist[node].payload:
9654 result = self.rpc.call_export_remove(node, instance_name)
9655 msg = result.fail_msg
9657 logging.error("Could not remove export for instance %s"
9658 " on node %s: %s", instance_name, node, msg)
9660 if fqdn_warn and not found:
9661 feedback_fn("Export not found. If trying to remove an export belonging"
9662 " to a deleted instance please use its Fully Qualified"
9666 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9669 This is an abstract class which is the parent of all the other tags LUs.
9673 def ExpandNames(self):
9674 self.needed_locks = {}
9675 if self.op.kind == constants.TAG_NODE:
9676 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9677 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9678 elif self.op.kind == constants.TAG_INSTANCE:
9679 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9680 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9682 def CheckPrereq(self):
9683 """Check prerequisites.
9686 if self.op.kind == constants.TAG_CLUSTER:
9687 self.target = self.cfg.GetClusterInfo()
9688 elif self.op.kind == constants.TAG_NODE:
9689 self.target = self.cfg.GetNodeInfo(self.op.name)
9690 elif self.op.kind == constants.TAG_INSTANCE:
9691 self.target = self.cfg.GetInstanceInfo(self.op.name)
9693 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9694 str(self.op.kind), errors.ECODE_INVAL)
9697 class LUGetTags(TagsLU):
9698 """Returns the tags of a given object.
9702 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9703 ("name", _NoDefault, _TNonEmptyString),
9707 def Exec(self, feedback_fn):
9708 """Returns the tag list.
9711 return list(self.target.GetTags())
9714 class LUSearchTags(NoHooksLU):
9715 """Searches the tags for a given pattern.
9719 ("pattern", _NoDefault, _TNonEmptyString),
9723 def ExpandNames(self):
9724 self.needed_locks = {}
9726 def CheckPrereq(self):
9727 """Check prerequisites.
9729 This checks the pattern passed for validity by compiling it.
9733 self.re = re.compile(self.op.pattern)
9734 except re.error, err:
9735 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9736 (self.op.pattern, err), errors.ECODE_INVAL)
9738 def Exec(self, feedback_fn):
9739 """Returns the tag list.
9743 tgts = [("/cluster", cfg.GetClusterInfo())]
9744 ilist = cfg.GetAllInstancesInfo().values()
9745 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9746 nlist = cfg.GetAllNodesInfo().values()
9747 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9749 for path, target in tgts:
9750 for tag in target.GetTags():
9751 if self.re.search(tag):
9752 results.append((path, tag))
9756 class LUAddTags(TagsLU):
9757 """Sets a tag on a given object.
9761 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9762 ("name", _NoDefault, _TNonEmptyString),
9763 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9767 def CheckPrereq(self):
9768 """Check prerequisites.
9770 This checks the type and length of the tag name and value.
9773 TagsLU.CheckPrereq(self)
9774 for tag in self.op.tags:
9775 objects.TaggableObject.ValidateTag(tag)
9777 def Exec(self, feedback_fn):
9782 for tag in self.op.tags:
9783 self.target.AddTag(tag)
9784 except errors.TagError, err:
9785 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9786 self.cfg.Update(self.target, feedback_fn)
9789 class LUDelTags(TagsLU):
9790 """Delete a list of tags from a given object.
9794 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9795 ("name", _NoDefault, _TNonEmptyString),
9796 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9800 def CheckPrereq(self):
9801 """Check prerequisites.
9803 This checks that we have the given tag.
9806 TagsLU.CheckPrereq(self)
9807 for tag in self.op.tags:
9808 objects.TaggableObject.ValidateTag(tag)
9809 del_tags = frozenset(self.op.tags)
9810 cur_tags = self.target.GetTags()
9811 if not del_tags <= cur_tags:
9812 diff_tags = del_tags - cur_tags
9813 diff_names = ["'%s'" % tag for tag in diff_tags]
9815 raise errors.OpPrereqError("Tag(s) %s not found" %
9816 (",".join(diff_names)), errors.ECODE_NOENT)
9818 def Exec(self, feedback_fn):
9819 """Remove the tag from the object.
9822 for tag in self.op.tags:
9823 self.target.RemoveTag(tag)
9824 self.cfg.Update(self.target, feedback_fn)
9827 class LUTestDelay(NoHooksLU):
9828 """Sleep for a specified amount of time.
9830 This LU sleeps on the master and/or nodes for a specified amount of
9835 ("duration", _NoDefault, _TFloat),
9836 ("on_master", True, _TBool),
9837 ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9838 ("repeat", 0, _TPositiveInt)
9842 def ExpandNames(self):
9843 """Expand names and set required locks.
9845 This expands the node list, if any.
9848 self.needed_locks = {}
9849 if self.op.on_nodes:
9850 # _GetWantedNodes can be used here, but is not always appropriate to use
9851 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9853 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9854 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9856 def _TestDelay(self):
9857 """Do the actual sleep.
9860 if self.op.on_master:
9861 if not utils.TestDelay(self.op.duration):
9862 raise errors.OpExecError("Error during master delay test")
9863 if self.op.on_nodes:
9864 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9865 for node, node_result in result.items():
9866 node_result.Raise("Failure during rpc call to node %s" % node)
9868 def Exec(self, feedback_fn):
9869 """Execute the test delay opcode, with the wanted repetitions.
9872 if self.op.repeat == 0:
9875 top_value = self.op.repeat - 1
9876 for i in range(self.op.repeat):
9877 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9881 class LUTestJobqueue(NoHooksLU):
9882 """Utility LU to test some aspects of the job queue.
9886 ("notify_waitlock", False, _TBool),
9887 ("notify_exec", False, _TBool),
9888 ("log_messages", _EmptyList, _TListOf(_TString)),
9889 ("fail", False, _TBool),
9893 # Must be lower than default timeout for WaitForJobChange to see whether it
9894 # notices changed jobs
9895 _CLIENT_CONNECT_TIMEOUT = 20.0
9896 _CLIENT_CONFIRM_TIMEOUT = 60.0
9899 def _NotifyUsingSocket(cls, cb, errcls):
9900 """Opens a Unix socket and waits for another program to connect.
9903 @param cb: Callback to send socket name to client
9905 @param errcls: Exception class to use for errors
9908 # Using a temporary directory as there's no easy way to create temporary
9909 # sockets without writing a custom loop around tempfile.mktemp and
9911 tmpdir = tempfile.mkdtemp()
9913 tmpsock = utils.PathJoin(tmpdir, "sock")
9915 logging.debug("Creating temporary socket at %s", tmpsock)
9916 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
9921 # Send details to client
9924 # Wait for client to connect before continuing
9925 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
9927 (conn, _) = sock.accept()
9928 except socket.error, err:
9929 raise errcls("Client didn't connect in time (%s)" % err)
9933 # Remove as soon as client is connected
9934 shutil.rmtree(tmpdir)
9936 # Wait for client to close
9939 # pylint: disable-msg=E1101
9940 # Instance of '_socketobject' has no ... member
9941 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
9943 except socket.error, err:
9944 raise errcls("Client failed to confirm notification (%s)" % err)
9948 def _SendNotification(self, test, arg, sockname):
9949 """Sends a notification to the client.
9952 @param test: Test name
9953 @param arg: Test argument (depends on test)
9954 @type sockname: string
9955 @param sockname: Socket path
9958 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
9960 def _Notify(self, prereq, test, arg):
9961 """Notifies the client of a test.
9964 @param prereq: Whether this is a prereq-phase test
9966 @param test: Test name
9967 @param arg: Test argument (depends on test)
9971 errcls = errors.OpPrereqError
9973 errcls = errors.OpExecError
9975 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
9979 def CheckArguments(self):
9980 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
9981 self.expandnames_calls = 0
9983 def ExpandNames(self):
9984 checkargs_calls = getattr(self, "checkargs_calls", 0)
9985 if checkargs_calls < 1:
9986 raise errors.ProgrammerError("CheckArguments was not called")
9988 self.expandnames_calls += 1
9990 if self.op.notify_waitlock:
9991 self._Notify(True, constants.JQT_EXPANDNAMES, None)
9993 self.LogInfo("Expanding names")
9995 # Get lock on master node (just to get a lock, not for a particular reason)
9996 self.needed_locks = {
9997 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10000 def Exec(self, feedback_fn):
10001 if self.expandnames_calls < 1:
10002 raise errors.ProgrammerError("ExpandNames was not called")
10004 if self.op.notify_exec:
10005 self._Notify(False, constants.JQT_EXEC, None)
10007 self.LogInfo("Executing")
10009 if self.op.log_messages:
10010 for idx, msg in enumerate(self.op.log_messages):
10011 self.LogInfo("Sending log message %s", idx + 1)
10012 feedback_fn(constants.JQT_MSGPREFIX + msg)
10013 # Report how many test messages have been sent
10014 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10017 raise errors.OpExecError("Opcode failure was requested")
10022 class IAllocator(object):
10023 """IAllocator framework.
10025 An IAllocator instance has three sets of attributes:
10026 - cfg that is needed to query the cluster
10027 - input data (all members of the _KEYS class attribute are required)
10028 - four buffer attributes (in|out_data|text), that represent the
10029 input (to the external script) in text and data structure format,
10030 and the output from it, again in two formats
10031 - the result variables from the script (success, info, nodes) for
10035 # pylint: disable-msg=R0902
10036 # lots of instance attributes
10038 "name", "mem_size", "disks", "disk_template",
10039 "os", "tags", "nics", "vcpus", "hypervisor",
10042 "name", "relocate_from",
10048 def __init__(self, cfg, rpc, mode, **kwargs):
10051 # init buffer variables
10052 self.in_text = self.out_text = self.in_data = self.out_data = None
10053 # init all input fields so that pylint is happy
10055 self.mem_size = self.disks = self.disk_template = None
10056 self.os = self.tags = self.nics = self.vcpus = None
10057 self.hypervisor = None
10058 self.relocate_from = None
10060 self.evac_nodes = None
10062 self.required_nodes = None
10063 # init result fields
10064 self.success = self.info = self.result = None
10065 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10066 keyset = self._ALLO_KEYS
10067 fn = self._AddNewInstance
10068 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10069 keyset = self._RELO_KEYS
10070 fn = self._AddRelocateInstance
10071 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10072 keyset = self._EVAC_KEYS
10073 fn = self._AddEvacuateNodes
10075 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10076 " IAllocator" % self.mode)
10078 if key not in keyset:
10079 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10080 " IAllocator" % key)
10081 setattr(self, key, kwargs[key])
10084 if key not in kwargs:
10085 raise errors.ProgrammerError("Missing input parameter '%s' to"
10086 " IAllocator" % key)
10087 self._BuildInputData(fn)
10089 def _ComputeClusterData(self):
10090 """Compute the generic allocator input data.
10092 This is the data that is independent of the actual operation.
10096 cluster_info = cfg.GetClusterInfo()
10099 "version": constants.IALLOCATOR_VERSION,
10100 "cluster_name": cfg.GetClusterName(),
10101 "cluster_tags": list(cluster_info.GetTags()),
10102 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10103 # we don't have job IDs
10105 iinfo = cfg.GetAllInstancesInfo().values()
10106 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10110 node_list = cfg.GetNodeList()
10112 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10113 hypervisor_name = self.hypervisor
10114 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10115 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10116 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10117 hypervisor_name = cluster_info.enabled_hypervisors[0]
10119 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10122 self.rpc.call_all_instances_info(node_list,
10123 cluster_info.enabled_hypervisors)
10124 for nname, nresult in node_data.items():
10125 # first fill in static (config-based) values
10126 ninfo = cfg.GetNodeInfo(nname)
10128 "tags": list(ninfo.GetTags()),
10129 "primary_ip": ninfo.primary_ip,
10130 "secondary_ip": ninfo.secondary_ip,
10131 "offline": ninfo.offline,
10132 "drained": ninfo.drained,
10133 "master_candidate": ninfo.master_candidate,
10136 if not (ninfo.offline or ninfo.drained):
10137 nresult.Raise("Can't get data for node %s" % nname)
10138 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10140 remote_info = nresult.payload
10142 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10143 'vg_size', 'vg_free', 'cpu_total']:
10144 if attr not in remote_info:
10145 raise errors.OpExecError("Node '%s' didn't return attribute"
10146 " '%s'" % (nname, attr))
10147 if not isinstance(remote_info[attr], int):
10148 raise errors.OpExecError("Node '%s' returned invalid value"
10150 (nname, attr, remote_info[attr]))
10151 # compute memory used by primary instances
10152 i_p_mem = i_p_up_mem = 0
10153 for iinfo, beinfo in i_list:
10154 if iinfo.primary_node == nname:
10155 i_p_mem += beinfo[constants.BE_MEMORY]
10156 if iinfo.name not in node_iinfo[nname].payload:
10159 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10160 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10161 remote_info['memory_free'] -= max(0, i_mem_diff)
10164 i_p_up_mem += beinfo[constants.BE_MEMORY]
10166 # compute memory used by instances
10168 "total_memory": remote_info['memory_total'],
10169 "reserved_memory": remote_info['memory_dom0'],
10170 "free_memory": remote_info['memory_free'],
10171 "total_disk": remote_info['vg_size'],
10172 "free_disk": remote_info['vg_free'],
10173 "total_cpus": remote_info['cpu_total'],
10174 "i_pri_memory": i_p_mem,
10175 "i_pri_up_memory": i_p_up_mem,
10177 pnr.update(pnr_dyn)
10179 node_results[nname] = pnr
10180 data["nodes"] = node_results
10184 for iinfo, beinfo in i_list:
10186 for nic in iinfo.nics:
10187 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10188 nic_dict = {"mac": nic.mac,
10190 "mode": filled_params[constants.NIC_MODE],
10191 "link": filled_params[constants.NIC_LINK],
10193 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10194 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10195 nic_data.append(nic_dict)
10197 "tags": list(iinfo.GetTags()),
10198 "admin_up": iinfo.admin_up,
10199 "vcpus": beinfo[constants.BE_VCPUS],
10200 "memory": beinfo[constants.BE_MEMORY],
10202 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10204 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10205 "disk_template": iinfo.disk_template,
10206 "hypervisor": iinfo.hypervisor,
10208 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10210 instance_data[iinfo.name] = pir
10212 data["instances"] = instance_data
10214 self.in_data = data
10216 def _AddNewInstance(self):
10217 """Add new instance data to allocator structure.
10219 This in combination with _AllocatorGetClusterData will create the
10220 correct structure needed as input for the allocator.
10222 The checks for the completeness of the opcode must have already been
10226 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10228 if self.disk_template in constants.DTS_NET_MIRROR:
10229 self.required_nodes = 2
10231 self.required_nodes = 1
10234 "disk_template": self.disk_template,
10237 "vcpus": self.vcpus,
10238 "memory": self.mem_size,
10239 "disks": self.disks,
10240 "disk_space_total": disk_space,
10242 "required_nodes": self.required_nodes,
10246 def _AddRelocateInstance(self):
10247 """Add relocate instance data to allocator structure.
10249 This in combination with _IAllocatorGetClusterData will create the
10250 correct structure needed as input for the allocator.
10252 The checks for the completeness of the opcode must have already been
10256 instance = self.cfg.GetInstanceInfo(self.name)
10257 if instance is None:
10258 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10259 " IAllocator" % self.name)
10261 if instance.disk_template not in constants.DTS_NET_MIRROR:
10262 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10263 errors.ECODE_INVAL)
10265 if len(instance.secondary_nodes) != 1:
10266 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10267 errors.ECODE_STATE)
10269 self.required_nodes = 1
10270 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10271 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10275 "disk_space_total": disk_space,
10276 "required_nodes": self.required_nodes,
10277 "relocate_from": self.relocate_from,
10281 def _AddEvacuateNodes(self):
10282 """Add evacuate nodes data to allocator structure.
10286 "evac_nodes": self.evac_nodes
10290 def _BuildInputData(self, fn):
10291 """Build input data structures.
10294 self._ComputeClusterData()
10297 request["type"] = self.mode
10298 self.in_data["request"] = request
10300 self.in_text = serializer.Dump(self.in_data)
10302 def Run(self, name, validate=True, call_fn=None):
10303 """Run an instance allocator and return the results.
10306 if call_fn is None:
10307 call_fn = self.rpc.call_iallocator_runner
10309 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10310 result.Raise("Failure while running the iallocator script")
10312 self.out_text = result.payload
10314 self._ValidateResult()
10316 def _ValidateResult(self):
10317 """Process the allocator results.
10319 This will process and if successful save the result in
10320 self.out_data and the other parameters.
10324 rdict = serializer.Load(self.out_text)
10325 except Exception, err:
10326 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10328 if not isinstance(rdict, dict):
10329 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10331 # TODO: remove backwards compatiblity in later versions
10332 if "nodes" in rdict and "result" not in rdict:
10333 rdict["result"] = rdict["nodes"]
10336 for key in "success", "info", "result":
10337 if key not in rdict:
10338 raise errors.OpExecError("Can't parse iallocator results:"
10339 " missing key '%s'" % key)
10340 setattr(self, key, rdict[key])
10342 if not isinstance(rdict["result"], list):
10343 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10345 self.out_data = rdict
10348 class LUTestAllocator(NoHooksLU):
10349 """Run allocator tests.
10351 This LU runs the allocator tests
10355 ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10356 ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10357 ("name", _NoDefault, _TNonEmptyString),
10358 ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10359 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10360 _TOr(_TNone, _TNonEmptyString))))),
10361 ("disks", _NoDefault, _TOr(_TNone, _TList)),
10362 ("hypervisor", None, _TMaybeString),
10363 ("allocator", None, _TMaybeString),
10364 ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10365 ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10366 ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10367 ("os", None, _TMaybeString),
10368 ("disk_template", None, _TMaybeString),
10369 ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10372 def CheckPrereq(self):
10373 """Check prerequisites.
10375 This checks the opcode parameters depending on the director and mode test.
10378 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10379 for attr in ["mem_size", "disks", "disk_template",
10380 "os", "tags", "nics", "vcpus"]:
10381 if not hasattr(self.op, attr):
10382 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10383 attr, errors.ECODE_INVAL)
10384 iname = self.cfg.ExpandInstanceName(self.op.name)
10385 if iname is not None:
10386 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10387 iname, errors.ECODE_EXISTS)
10388 if not isinstance(self.op.nics, list):
10389 raise errors.OpPrereqError("Invalid parameter 'nics'",
10390 errors.ECODE_INVAL)
10391 if not isinstance(self.op.disks, list):
10392 raise errors.OpPrereqError("Invalid parameter 'disks'",
10393 errors.ECODE_INVAL)
10394 for row in self.op.disks:
10395 if (not isinstance(row, dict) or
10396 "size" not in row or
10397 not isinstance(row["size"], int) or
10398 "mode" not in row or
10399 row["mode"] not in ['r', 'w']):
10400 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10401 " parameter", errors.ECODE_INVAL)
10402 if self.op.hypervisor is None:
10403 self.op.hypervisor = self.cfg.GetHypervisorType()
10404 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10405 fname = _ExpandInstanceName(self.cfg, self.op.name)
10406 self.op.name = fname
10407 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10408 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10409 if not hasattr(self.op, "evac_nodes"):
10410 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10411 " opcode input", errors.ECODE_INVAL)
10413 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10414 self.op.mode, errors.ECODE_INVAL)
10416 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10417 if self.op.allocator is None:
10418 raise errors.OpPrereqError("Missing allocator name",
10419 errors.ECODE_INVAL)
10420 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10421 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10422 self.op.direction, errors.ECODE_INVAL)
10424 def Exec(self, feedback_fn):
10425 """Run the allocator test.
10428 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10429 ial = IAllocator(self.cfg, self.rpc,
10432 mem_size=self.op.mem_size,
10433 disks=self.op.disks,
10434 disk_template=self.op.disk_template,
10438 vcpus=self.op.vcpus,
10439 hypervisor=self.op.hypervisor,
10441 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10442 ial = IAllocator(self.cfg, self.rpc,
10445 relocate_from=list(self.relocate_from),
10447 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10448 ial = IAllocator(self.cfg, self.rpc,
10450 evac_nodes=self.op.evac_nodes)
10452 raise errors.ProgrammerError("Uncatched mode %s in"
10453 " LUTestAllocator.Exec", self.op.mode)
10455 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10456 result = ial.in_text
10458 ial.Run(self.op.allocator, validate=False)
10459 result = ial.out_text