4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
57 import ganeti.masterd.instance # pylint: disable-msg=W0611
60 # Modifiable default values; need to define these here before the
64 """Returns an empty list.
71 """Returns an empty dict.
77 #: The without-default default value
81 #: The no-type (value to complex to check it in the type system)
87 """Checks if the given value is not None.
90 return val is not None
94 """Checks if the given value is None.
101 """Checks if the given value is a boolean.
104 return isinstance(val, bool)
108 """Checks if the given value is an integer.
111 return isinstance(val, int)
115 """Checks if the given value is a float.
118 return isinstance(val, float)
122 """Checks if the given value is a string.
125 return isinstance(val, basestring)
129 """Checks if a given value evaluates to a boolean True value.
135 def _TElemOf(target_list):
136 """Builds a function that checks if a given value is a member of a list.
139 return lambda val: val in target_list
144 """Checks if the given value is a list.
147 return isinstance(val, list)
151 """Checks if the given value is a dictionary.
154 return isinstance(val, dict)
157 def _TIsLength(size):
158 """Check is the given container is of the given size.
161 return lambda container: len(container) == size
166 """Combine multiple functions using an AND operation.
170 return compat.all(t(val) for t in args)
175 """Combine multiple functions using an AND operation.
179 return compat.any(t(val) for t in args)
184 """Checks that a modified version of the argument passes the given test.
187 return lambda val: test(fn(val))
192 #: a non-empty string
193 _TNonEmptyString = _TAnd(_TString, _TTrue)
196 #: a maybe non-empty string
197 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
200 #: a maybe boolean (bool or none)
201 _TMaybeBool = _TOr(_TBool, _TNone)
204 #: a positive integer
205 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
207 #: a strictly positive integer
208 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
211 def _TListOf(my_type):
212 """Checks if a given value is a list with all elements of the same type.
216 lambda lst: compat.all(my_type(v) for v in lst))
219 def _TDictOf(key_type, val_type):
220 """Checks a dict type for the type of its key/values.
224 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
225 and compat.all(val_type(v)
226 for v in my_dict.values())))
229 # Common opcode attributes
231 #: output fields for a query operation
232 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
235 #: the shutdown timeout
236 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
239 #: the force parameter
240 _PForce = ("force", False, _TBool)
242 #: a required instance name (for single-instance LUs)
243 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
246 #: a required node name (for single-node LUs)
247 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
249 #: the migration type (live/non-live)
250 _PMigrationMode = ("mode", None, _TOr(_TNone,
251 _TElemOf(constants.HT_MIGRATION_MODES)))
253 #: the obsolete 'live' mode (boolean)
254 _PMigrationLive = ("live", None, _TMaybeBool)
258 class LogicalUnit(object):
259 """Logical Unit base class.
261 Subclasses must follow these rules:
262 - implement ExpandNames
263 - implement CheckPrereq (except when tasklets are used)
264 - implement Exec (except when tasklets are used)
265 - implement BuildHooksEnv
266 - redefine HPATH and HTYPE
267 - optionally redefine their run requirements:
268 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
270 Note that all commands require root permissions.
272 @ivar dry_run_result: the value (if any) that will be returned to the caller
273 in dry-run mode (signalled by opcode dry_run parameter)
274 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
275 they should get if not already defined, and types they must match
283 def __init__(self, processor, op, context, rpc):
284 """Constructor for LogicalUnit.
286 This needs to be overridden in derived classes in order to check op
290 self.proc = processor
292 self.cfg = context.cfg
293 self.context = context
295 # Dicts used to declare locking needs to mcpu
296 self.needed_locks = None
297 self.acquired_locks = {}
298 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
300 self.remove_locks = {}
301 # Used to force good behavior when calling helper functions
302 self.recalculate_locks = {}
305 self.Log = processor.Log # pylint: disable-msg=C0103
306 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
307 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
308 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
309 # support for dry-run
310 self.dry_run_result = None
311 # support for generic debug attribute
312 if (not hasattr(self.op, "debug_level") or
313 not isinstance(self.op.debug_level, int)):
314 self.op.debug_level = 0
319 # The new kind-of-type-system
320 op_id = self.op.OP_ID
321 for attr_name, aval, test in self._OP_PARAMS:
322 if not hasattr(op, attr_name):
323 if aval == _NoDefault:
324 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
325 (op_id, attr_name), errors.ECODE_INVAL)
331 setattr(self.op, attr_name, dval)
332 attr_val = getattr(op, attr_name)
336 if not callable(test):
337 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
338 " given type is not a proper type (%s)" %
339 (op_id, attr_name, test))
340 if not test(attr_val):
341 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
342 self.op.OP_ID, attr_name, type(attr_val), attr_val)
343 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
344 (op_id, attr_name), errors.ECODE_INVAL)
346 self.CheckArguments()
349 """Returns the SshRunner object
353 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
356 ssh = property(fget=__GetSSH)
358 def CheckArguments(self):
359 """Check syntactic validity for the opcode arguments.
361 This method is for doing a simple syntactic check and ensure
362 validity of opcode parameters, without any cluster-related
363 checks. While the same can be accomplished in ExpandNames and/or
364 CheckPrereq, doing these separate is better because:
366 - ExpandNames is left as as purely a lock-related function
367 - CheckPrereq is run after we have acquired locks (and possible
370 The function is allowed to change the self.op attribute so that
371 later methods can no longer worry about missing parameters.
376 def ExpandNames(self):
377 """Expand names for this LU.
379 This method is called before starting to execute the opcode, and it should
380 update all the parameters of the opcode to their canonical form (e.g. a
381 short node name must be fully expanded after this method has successfully
382 completed). This way locking, hooks, logging, ecc. can work correctly.
384 LUs which implement this method must also populate the self.needed_locks
385 member, as a dict with lock levels as keys, and a list of needed lock names
388 - use an empty dict if you don't need any lock
389 - if you don't need any lock at a particular level omit that level
390 - don't put anything for the BGL level
391 - if you want all locks at a level use locking.ALL_SET as a value
393 If you need to share locks (rather than acquire them exclusively) at one
394 level you can modify self.share_locks, setting a true value (usually 1) for
395 that level. By default locks are not shared.
397 This function can also define a list of tasklets, which then will be
398 executed in order instead of the usual LU-level CheckPrereq and Exec
399 functions, if those are not defined by the LU.
403 # Acquire all nodes and one instance
404 self.needed_locks = {
405 locking.LEVEL_NODE: locking.ALL_SET,
406 locking.LEVEL_INSTANCE: ['instance1.example.com'],
408 # Acquire just two nodes
409 self.needed_locks = {
410 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
413 self.needed_locks = {} # No, you can't leave it to the default value None
416 # The implementation of this method is mandatory only if the new LU is
417 # concurrent, so that old LUs don't need to be changed all at the same
420 self.needed_locks = {} # Exclusive LUs don't need locks.
422 raise NotImplementedError
424 def DeclareLocks(self, level):
425 """Declare LU locking needs for a level
427 While most LUs can just declare their locking needs at ExpandNames time,
428 sometimes there's the need to calculate some locks after having acquired
429 the ones before. This function is called just before acquiring locks at a
430 particular level, but after acquiring the ones at lower levels, and permits
431 such calculations. It can be used to modify self.needed_locks, and by
432 default it does nothing.
434 This function is only called if you have something already set in
435 self.needed_locks for the level.
437 @param level: Locking level which is going to be locked
438 @type level: member of ganeti.locking.LEVELS
442 def CheckPrereq(self):
443 """Check prerequisites for this LU.
445 This method should check that the prerequisites for the execution
446 of this LU are fulfilled. It can do internode communication, but
447 it should be idempotent - no cluster or system changes are
450 The method should raise errors.OpPrereqError in case something is
451 not fulfilled. Its return value is ignored.
453 This method should also update all the parameters of the opcode to
454 their canonical form if it hasn't been done by ExpandNames before.
457 if self.tasklets is not None:
458 for (idx, tl) in enumerate(self.tasklets):
459 logging.debug("Checking prerequisites for tasklet %s/%s",
460 idx + 1, len(self.tasklets))
465 def Exec(self, feedback_fn):
468 This method should implement the actual work. It should raise
469 errors.OpExecError for failures that are somewhat dealt with in
473 if self.tasklets is not None:
474 for (idx, tl) in enumerate(self.tasklets):
475 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
478 raise NotImplementedError
480 def BuildHooksEnv(self):
481 """Build hooks environment for this LU.
483 This method should return a three-node tuple consisting of: a dict
484 containing the environment that will be used for running the
485 specific hook for this LU, a list of node names on which the hook
486 should run before the execution, and a list of node names on which
487 the hook should run after the execution.
489 The keys of the dict must not have 'GANETI_' prefixed as this will
490 be handled in the hooks runner. Also note additional keys will be
491 added by the hooks runner. If the LU doesn't define any
492 environment, an empty dict (and not None) should be returned.
494 No nodes should be returned as an empty list (and not None).
496 Note that if the HPATH for a LU class is None, this function will
500 raise NotImplementedError
502 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
503 """Notify the LU about the results of its hooks.
505 This method is called every time a hooks phase is executed, and notifies
506 the Logical Unit about the hooks' result. The LU can then use it to alter
507 its result based on the hooks. By default the method does nothing and the
508 previous result is passed back unchanged but any LU can define it if it
509 wants to use the local cluster hook-scripts somehow.
511 @param phase: one of L{constants.HOOKS_PHASE_POST} or
512 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
513 @param hook_results: the results of the multi-node hooks rpc call
514 @param feedback_fn: function used send feedback back to the caller
515 @param lu_result: the previous Exec result this LU had, or None
517 @return: the new Exec result, based on the previous result
521 # API must be kept, thus we ignore the unused argument and could
522 # be a function warnings
523 # pylint: disable-msg=W0613,R0201
526 def _ExpandAndLockInstance(self):
527 """Helper function to expand and lock an instance.
529 Many LUs that work on an instance take its name in self.op.instance_name
530 and need to expand it and then declare the expanded name for locking. This
531 function does it, and then updates self.op.instance_name to the expanded
532 name. It also initializes needed_locks as a dict, if this hasn't been done
536 if self.needed_locks is None:
537 self.needed_locks = {}
539 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
540 "_ExpandAndLockInstance called with instance-level locks set"
541 self.op.instance_name = _ExpandInstanceName(self.cfg,
542 self.op.instance_name)
543 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
545 def _LockInstancesNodes(self, primary_only=False):
546 """Helper function to declare instances' nodes for locking.
548 This function should be called after locking one or more instances to lock
549 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
550 with all primary or secondary nodes for instances already locked and
551 present in self.needed_locks[locking.LEVEL_INSTANCE].
553 It should be called from DeclareLocks, and for safety only works if
554 self.recalculate_locks[locking.LEVEL_NODE] is set.
556 In the future it may grow parameters to just lock some instance's nodes, or
557 to just lock primaries or secondary nodes, if needed.
559 If should be called in DeclareLocks in a way similar to::
561 if level == locking.LEVEL_NODE:
562 self._LockInstancesNodes()
564 @type primary_only: boolean
565 @param primary_only: only lock primary nodes of locked instances
568 assert locking.LEVEL_NODE in self.recalculate_locks, \
569 "_LockInstancesNodes helper function called with no nodes to recalculate"
571 # TODO: check if we're really been called with the instance locks held
573 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
574 # future we might want to have different behaviors depending on the value
575 # of self.recalculate_locks[locking.LEVEL_NODE]
577 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
578 instance = self.context.cfg.GetInstanceInfo(instance_name)
579 wanted_nodes.append(instance.primary_node)
581 wanted_nodes.extend(instance.secondary_nodes)
583 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
584 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
585 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
586 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
588 del self.recalculate_locks[locking.LEVEL_NODE]
591 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
592 """Simple LU which runs no hooks.
594 This LU is intended as a parent for other LogicalUnits which will
595 run no hooks, in order to reduce duplicate code.
601 def BuildHooksEnv(self):
602 """Empty BuildHooksEnv for NoHooksLu.
604 This just raises an error.
607 assert False, "BuildHooksEnv called for NoHooksLUs"
611 """Tasklet base class.
613 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
614 they can mix legacy code with tasklets. Locking needs to be done in the LU,
615 tasklets know nothing about locks.
617 Subclasses must follow these rules:
618 - Implement CheckPrereq
622 def __init__(self, lu):
629 def CheckPrereq(self):
630 """Check prerequisites for this tasklets.
632 This method should check whether the prerequisites for the execution of
633 this tasklet are fulfilled. It can do internode communication, but it
634 should be idempotent - no cluster or system changes are allowed.
636 The method should raise errors.OpPrereqError in case something is not
637 fulfilled. Its return value is ignored.
639 This method should also update all parameters to their canonical form if it
640 hasn't been done before.
645 def Exec(self, feedback_fn):
646 """Execute the tasklet.
648 This method should implement the actual work. It should raise
649 errors.OpExecError for failures that are somewhat dealt with in code, or
653 raise NotImplementedError
656 def _GetWantedNodes(lu, nodes):
657 """Returns list of checked and expanded node names.
659 @type lu: L{LogicalUnit}
660 @param lu: the logical unit on whose behalf we execute
662 @param nodes: list of node names or None for all nodes
664 @return: the list of nodes, sorted
665 @raise errors.ProgrammerError: if the nodes parameter is wrong type
669 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
670 " non-empty list of nodes whose name is to be expanded.")
672 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
673 return utils.NiceSort(wanted)
676 def _GetWantedInstances(lu, instances):
677 """Returns list of checked and expanded instance names.
679 @type lu: L{LogicalUnit}
680 @param lu: the logical unit on whose behalf we execute
681 @type instances: list
682 @param instances: list of instance names or None for all instances
684 @return: the list of instances, sorted
685 @raise errors.OpPrereqError: if the instances parameter is wrong type
686 @raise errors.OpPrereqError: if any of the passed instances is not found
690 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
692 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
696 def _GetUpdatedParams(old_params, update_dict,
697 use_default=True, use_none=False):
698 """Return the new version of a parameter dictionary.
700 @type old_params: dict
701 @param old_params: old parameters
702 @type update_dict: dict
703 @param update_dict: dict containing new parameter values, or
704 constants.VALUE_DEFAULT to reset the parameter to its default
706 @param use_default: boolean
707 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
708 values as 'to be deleted' values
709 @param use_none: boolean
710 @type use_none: whether to recognise C{None} values as 'to be
713 @return: the new parameter dictionary
716 params_copy = copy.deepcopy(old_params)
717 for key, val in update_dict.iteritems():
718 if ((use_default and val == constants.VALUE_DEFAULT) or
719 (use_none and val is None)):
725 params_copy[key] = val
729 def _CheckOutputFields(static, dynamic, selected):
730 """Checks whether all selected fields are valid.
732 @type static: L{utils.FieldSet}
733 @param static: static fields set
734 @type dynamic: L{utils.FieldSet}
735 @param dynamic: dynamic fields set
742 delta = f.NonMatching(selected)
744 raise errors.OpPrereqError("Unknown output fields selected: %s"
745 % ",".join(delta), errors.ECODE_INVAL)
748 def _CheckGlobalHvParams(params):
749 """Validates that given hypervisor params are not global ones.
751 This will ensure that instances don't get customised versions of
755 used_globals = constants.HVC_GLOBALS.intersection(params)
757 msg = ("The following hypervisor parameters are global and cannot"
758 " be customized at instance level, please modify them at"
759 " cluster level: %s" % utils.CommaJoin(used_globals))
760 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
763 def _CheckNodeOnline(lu, node):
764 """Ensure that a given node is online.
766 @param lu: the LU on behalf of which we make the check
767 @param node: the node to check
768 @raise errors.OpPrereqError: if the node is offline
771 if lu.cfg.GetNodeInfo(node).offline:
772 raise errors.OpPrereqError("Can't use offline node %s" % node,
776 def _CheckNodeNotDrained(lu, node):
777 """Ensure that a given node is not drained.
779 @param lu: the LU on behalf of which we make the check
780 @param node: the node to check
781 @raise errors.OpPrereqError: if the node is drained
784 if lu.cfg.GetNodeInfo(node).drained:
785 raise errors.OpPrereqError("Can't use drained node %s" % node,
789 def _CheckNodeHasOS(lu, node, os_name, force_variant):
790 """Ensure that a node supports a given OS.
792 @param lu: the LU on behalf of which we make the check
793 @param node: the node to check
794 @param os_name: the OS to query about
795 @param force_variant: whether to ignore variant errors
796 @raise errors.OpPrereqError: if the node is not supporting the OS
799 result = lu.rpc.call_os_get(node, os_name)
800 result.Raise("OS '%s' not in supported OS list for node %s" %
802 prereq=True, ecode=errors.ECODE_INVAL)
803 if not force_variant:
804 _CheckOSVariant(result.payload, os_name)
807 def _RequireFileStorage():
808 """Checks that file storage is enabled.
810 @raise errors.OpPrereqError: when file storage is disabled
813 if not constants.ENABLE_FILE_STORAGE:
814 raise errors.OpPrereqError("File storage disabled at configure time",
818 def _CheckDiskTemplate(template):
819 """Ensure a given disk template is valid.
822 if template not in constants.DISK_TEMPLATES:
823 msg = ("Invalid disk template name '%s', valid templates are: %s" %
824 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
825 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
826 if template == constants.DT_FILE:
827 _RequireFileStorage()
831 def _CheckStorageType(storage_type):
832 """Ensure a given storage type is valid.
835 if storage_type not in constants.VALID_STORAGE_TYPES:
836 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
838 if storage_type == constants.ST_FILE:
839 _RequireFileStorage()
843 def _GetClusterDomainSecret():
844 """Reads the cluster domain secret.
847 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
851 def _CheckInstanceDown(lu, instance, reason):
852 """Ensure that an instance is not running."""
853 if instance.admin_up:
854 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
855 (instance.name, reason), errors.ECODE_STATE)
857 pnode = instance.primary_node
858 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
859 ins_l.Raise("Can't contact node %s for instance information" % pnode,
860 prereq=True, ecode=errors.ECODE_ENVIRON)
862 if instance.name in ins_l.payload:
863 raise errors.OpPrereqError("Instance %s is running, %s" %
864 (instance.name, reason), errors.ECODE_STATE)
867 def _ExpandItemName(fn, name, kind):
868 """Expand an item name.
870 @param fn: the function to use for expansion
871 @param name: requested item name
872 @param kind: text description ('Node' or 'Instance')
873 @return: the resolved (full) name
874 @raise errors.OpPrereqError: if the item is not found
878 if full_name is None:
879 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
884 def _ExpandNodeName(cfg, name):
885 """Wrapper over L{_ExpandItemName} for nodes."""
886 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
889 def _ExpandInstanceName(cfg, name):
890 """Wrapper over L{_ExpandItemName} for instance."""
891 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
894 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
895 memory, vcpus, nics, disk_template, disks,
896 bep, hvp, hypervisor_name):
897 """Builds instance related env variables for hooks
899 This builds the hook environment from individual variables.
902 @param name: the name of the instance
903 @type primary_node: string
904 @param primary_node: the name of the instance's primary node
905 @type secondary_nodes: list
906 @param secondary_nodes: list of secondary nodes as strings
907 @type os_type: string
908 @param os_type: the name of the instance's OS
909 @type status: boolean
910 @param status: the should_run status of the instance
912 @param memory: the memory size of the instance
914 @param vcpus: the count of VCPUs the instance has
916 @param nics: list of tuples (ip, mac, mode, link) representing
917 the NICs the instance has
918 @type disk_template: string
919 @param disk_template: the disk template of the instance
921 @param disks: the list of (size, mode) pairs
923 @param bep: the backend parameters for the instance
925 @param hvp: the hypervisor parameters for the instance
926 @type hypervisor_name: string
927 @param hypervisor_name: the hypervisor for the instance
929 @return: the hook environment for this instance
938 "INSTANCE_NAME": name,
939 "INSTANCE_PRIMARY": primary_node,
940 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
941 "INSTANCE_OS_TYPE": os_type,
942 "INSTANCE_STATUS": str_status,
943 "INSTANCE_MEMORY": memory,
944 "INSTANCE_VCPUS": vcpus,
945 "INSTANCE_DISK_TEMPLATE": disk_template,
946 "INSTANCE_HYPERVISOR": hypervisor_name,
950 nic_count = len(nics)
951 for idx, (ip, mac, mode, link) in enumerate(nics):
954 env["INSTANCE_NIC%d_IP" % idx] = ip
955 env["INSTANCE_NIC%d_MAC" % idx] = mac
956 env["INSTANCE_NIC%d_MODE" % idx] = mode
957 env["INSTANCE_NIC%d_LINK" % idx] = link
958 if mode == constants.NIC_MODE_BRIDGED:
959 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
963 env["INSTANCE_NIC_COUNT"] = nic_count
966 disk_count = len(disks)
967 for idx, (size, mode) in enumerate(disks):
968 env["INSTANCE_DISK%d_SIZE" % idx] = size
969 env["INSTANCE_DISK%d_MODE" % idx] = mode
973 env["INSTANCE_DISK_COUNT"] = disk_count
975 for source, kind in [(bep, "BE"), (hvp, "HV")]:
976 for key, value in source.items():
977 env["INSTANCE_%s_%s" % (kind, key)] = value
982 def _NICListToTuple(lu, nics):
983 """Build a list of nic information tuples.
985 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
986 value in LUQueryInstanceData.
988 @type lu: L{LogicalUnit}
989 @param lu: the logical unit on whose behalf we execute
990 @type nics: list of L{objects.NIC}
991 @param nics: list of nics to convert to hooks tuples
995 cluster = lu.cfg.GetClusterInfo()
999 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1000 mode = filled_params[constants.NIC_MODE]
1001 link = filled_params[constants.NIC_LINK]
1002 hooks_nics.append((ip, mac, mode, link))
1006 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1007 """Builds instance related env variables for hooks from an object.
1009 @type lu: L{LogicalUnit}
1010 @param lu: the logical unit on whose behalf we execute
1011 @type instance: L{objects.Instance}
1012 @param instance: the instance for which we should build the
1014 @type override: dict
1015 @param override: dictionary with key/values that will override
1018 @return: the hook environment dictionary
1021 cluster = lu.cfg.GetClusterInfo()
1022 bep = cluster.FillBE(instance)
1023 hvp = cluster.FillHV(instance)
1025 'name': instance.name,
1026 'primary_node': instance.primary_node,
1027 'secondary_nodes': instance.secondary_nodes,
1028 'os_type': instance.os,
1029 'status': instance.admin_up,
1030 'memory': bep[constants.BE_MEMORY],
1031 'vcpus': bep[constants.BE_VCPUS],
1032 'nics': _NICListToTuple(lu, instance.nics),
1033 'disk_template': instance.disk_template,
1034 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1037 'hypervisor_name': instance.hypervisor,
1040 args.update(override)
1041 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1044 def _AdjustCandidatePool(lu, exceptions):
1045 """Adjust the candidate pool after node operations.
1048 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1050 lu.LogInfo("Promoted nodes to master candidate role: %s",
1051 utils.CommaJoin(node.name for node in mod_list))
1052 for name in mod_list:
1053 lu.context.ReaddNode(name)
1054 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1056 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1060 def _DecideSelfPromotion(lu, exceptions=None):
1061 """Decide whether I should promote myself as a master candidate.
1064 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1065 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1066 # the new node will increase mc_max with one, so:
1067 mc_should = min(mc_should + 1, cp_size)
1068 return mc_now < mc_should
1071 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1072 """Check that the brigdes needed by a list of nics exist.
1075 cluster = lu.cfg.GetClusterInfo()
1076 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1077 brlist = [params[constants.NIC_LINK] for params in paramslist
1078 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1080 result = lu.rpc.call_bridges_exist(target_node, brlist)
1081 result.Raise("Error checking bridges on destination node '%s'" %
1082 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1085 def _CheckInstanceBridgesExist(lu, instance, node=None):
1086 """Check that the brigdes needed by an instance exist.
1090 node = instance.primary_node
1091 _CheckNicsBridgesExist(lu, instance.nics, node)
1094 def _CheckOSVariant(os_obj, name):
1095 """Check whether an OS name conforms to the os variants specification.
1097 @type os_obj: L{objects.OS}
1098 @param os_obj: OS object to check
1100 @param name: OS name passed by the user, to check for validity
1103 if not os_obj.supported_variants:
1105 variant = objects.OS.GetVariant(name)
1107 raise errors.OpPrereqError("OS name must include a variant",
1110 if variant not in os_obj.supported_variants:
1111 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1114 def _GetNodeInstancesInner(cfg, fn):
1115 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1118 def _GetNodeInstances(cfg, node_name):
1119 """Returns a list of all primary and secondary instances on a node.
1123 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1126 def _GetNodePrimaryInstances(cfg, node_name):
1127 """Returns primary instances on a node.
1130 return _GetNodeInstancesInner(cfg,
1131 lambda inst: node_name == inst.primary_node)
1134 def _GetNodeSecondaryInstances(cfg, node_name):
1135 """Returns secondary instances on a node.
1138 return _GetNodeInstancesInner(cfg,
1139 lambda inst: node_name in inst.secondary_nodes)
1142 def _GetStorageTypeArgs(cfg, storage_type):
1143 """Returns the arguments for a storage type.
1146 # Special case for file storage
1147 if storage_type == constants.ST_FILE:
1148 # storage.FileStorage wants a list of storage directories
1149 return [[cfg.GetFileStorageDir()]]
1154 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1157 for dev in instance.disks:
1158 cfg.SetDiskID(dev, node_name)
1160 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1161 result.Raise("Failed to get disk status from node %s" % node_name,
1162 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1164 for idx, bdev_status in enumerate(result.payload):
1165 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1171 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1172 """Check the sanity of iallocator and node arguments and use the
1173 cluster-wide iallocator if appropriate.
1175 Check that at most one of (iallocator, node) is specified. If none is
1176 specified, then the LU's opcode's iallocator slot is filled with the
1177 cluster-wide default iallocator.
1179 @type iallocator_slot: string
1180 @param iallocator_slot: the name of the opcode iallocator slot
1181 @type node_slot: string
1182 @param node_slot: the name of the opcode target node slot
1185 node = getattr(lu.op, node_slot, None)
1186 iallocator = getattr(lu.op, iallocator_slot, None)
1188 if node is not None and iallocator is not None:
1189 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1191 elif node is None and iallocator is None:
1192 default_iallocator = lu.cfg.GetDefaultIAllocator()
1193 if default_iallocator:
1194 setattr(lu.op, iallocator_slot, default_iallocator)
1196 raise errors.OpPrereqError("No iallocator or node given and no"
1197 " cluster-wide default iallocator found."
1198 " Please specify either an iallocator or a"
1199 " node, or set a cluster-wide default"
1203 class LUPostInitCluster(LogicalUnit):
1204 """Logical unit for running hooks after cluster initialization.
1207 HPATH = "cluster-init"
1208 HTYPE = constants.HTYPE_CLUSTER
1210 def BuildHooksEnv(self):
1214 env = {"OP_TARGET": self.cfg.GetClusterName()}
1215 mn = self.cfg.GetMasterNode()
1216 return env, [], [mn]
1218 def Exec(self, feedback_fn):
1225 class LUDestroyCluster(LogicalUnit):
1226 """Logical unit for destroying the cluster.
1229 HPATH = "cluster-destroy"
1230 HTYPE = constants.HTYPE_CLUSTER
1232 def BuildHooksEnv(self):
1236 env = {"OP_TARGET": self.cfg.GetClusterName()}
1239 def CheckPrereq(self):
1240 """Check prerequisites.
1242 This checks whether the cluster is empty.
1244 Any errors are signaled by raising errors.OpPrereqError.
1247 master = self.cfg.GetMasterNode()
1249 nodelist = self.cfg.GetNodeList()
1250 if len(nodelist) != 1 or nodelist[0] != master:
1251 raise errors.OpPrereqError("There are still %d node(s) in"
1252 " this cluster." % (len(nodelist) - 1),
1254 instancelist = self.cfg.GetInstanceList()
1256 raise errors.OpPrereqError("There are still %d instance(s) in"
1257 " this cluster." % len(instancelist),
1260 def Exec(self, feedback_fn):
1261 """Destroys the cluster.
1264 master = self.cfg.GetMasterNode()
1266 # Run post hooks on master node before it's removed
1267 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1269 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1271 # pylint: disable-msg=W0702
1272 self.LogWarning("Errors occurred running hooks on %s" % master)
1274 result = self.rpc.call_node_stop_master(master, False)
1275 result.Raise("Could not disable the master role")
1280 def _VerifyCertificate(filename):
1281 """Verifies a certificate for LUVerifyCluster.
1283 @type filename: string
1284 @param filename: Path to PEM file
1288 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1289 utils.ReadFile(filename))
1290 except Exception, err: # pylint: disable-msg=W0703
1291 return (LUVerifyCluster.ETYPE_ERROR,
1292 "Failed to load X509 certificate %s: %s" % (filename, err))
1295 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1296 constants.SSL_CERT_EXPIRATION_ERROR)
1299 fnamemsg = "While verifying %s: %s" % (filename, msg)
1304 return (None, fnamemsg)
1305 elif errcode == utils.CERT_WARNING:
1306 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1307 elif errcode == utils.CERT_ERROR:
1308 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1310 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1313 class LUVerifyCluster(LogicalUnit):
1314 """Verifies the cluster status.
1317 HPATH = "cluster-verify"
1318 HTYPE = constants.HTYPE_CLUSTER
1320 ("skip_checks", _EmptyList,
1321 _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1322 ("verbose", False, _TBool),
1323 ("error_codes", False, _TBool),
1324 ("debug_simulate_errors", False, _TBool),
1328 TCLUSTER = "cluster"
1330 TINSTANCE = "instance"
1332 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1333 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1334 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1335 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1336 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1337 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1338 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1339 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1340 ENODEDRBD = (TNODE, "ENODEDRBD")
1341 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1342 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1343 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1344 ENODEHV = (TNODE, "ENODEHV")
1345 ENODELVM = (TNODE, "ENODELVM")
1346 ENODEN1 = (TNODE, "ENODEN1")
1347 ENODENET = (TNODE, "ENODENET")
1348 ENODEOS = (TNODE, "ENODEOS")
1349 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1350 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1351 ENODERPC = (TNODE, "ENODERPC")
1352 ENODESSH = (TNODE, "ENODESSH")
1353 ENODEVERSION = (TNODE, "ENODEVERSION")
1354 ENODESETUP = (TNODE, "ENODESETUP")
1355 ENODETIME = (TNODE, "ENODETIME")
1357 ETYPE_FIELD = "code"
1358 ETYPE_ERROR = "ERROR"
1359 ETYPE_WARNING = "WARNING"
1361 class NodeImage(object):
1362 """A class representing the logical and physical status of a node.
1365 @ivar name: the node name to which this object refers
1366 @ivar volumes: a structure as returned from
1367 L{ganeti.backend.GetVolumeList} (runtime)
1368 @ivar instances: a list of running instances (runtime)
1369 @ivar pinst: list of configured primary instances (config)
1370 @ivar sinst: list of configured secondary instances (config)
1371 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1372 of this node (config)
1373 @ivar mfree: free memory, as reported by hypervisor (runtime)
1374 @ivar dfree: free disk, as reported by the node (runtime)
1375 @ivar offline: the offline status (config)
1376 @type rpc_fail: boolean
1377 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1378 not whether the individual keys were correct) (runtime)
1379 @type lvm_fail: boolean
1380 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1381 @type hyp_fail: boolean
1382 @ivar hyp_fail: whether the RPC call didn't return the instance list
1383 @type ghost: boolean
1384 @ivar ghost: whether this is a known node or not (config)
1385 @type os_fail: boolean
1386 @ivar os_fail: whether the RPC call didn't return valid OS data
1388 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1391 def __init__(self, offline=False, name=None):
1400 self.offline = offline
1401 self.rpc_fail = False
1402 self.lvm_fail = False
1403 self.hyp_fail = False
1405 self.os_fail = False
1408 def ExpandNames(self):
1409 self.needed_locks = {
1410 locking.LEVEL_NODE: locking.ALL_SET,
1411 locking.LEVEL_INSTANCE: locking.ALL_SET,
1413 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1415 def _Error(self, ecode, item, msg, *args, **kwargs):
1416 """Format an error message.
1418 Based on the opcode's error_codes parameter, either format a
1419 parseable error code, or a simpler error string.
1421 This must be called only from Exec and functions called from Exec.
1424 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1426 # first complete the msg
1429 # then format the whole message
1430 if self.op.error_codes:
1431 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1437 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1438 # and finally report it via the feedback_fn
1439 self._feedback_fn(" - %s" % msg)
1441 def _ErrorIf(self, cond, *args, **kwargs):
1442 """Log an error message if the passed condition is True.
1445 cond = bool(cond) or self.op.debug_simulate_errors
1447 self._Error(*args, **kwargs)
1448 # do not mark the operation as failed for WARN cases only
1449 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1450 self.bad = self.bad or cond
1452 def _VerifyNode(self, ninfo, nresult):
1453 """Perform some basic validation on data returned from a node.
1455 - check the result data structure is well formed and has all the
1457 - check ganeti version
1459 @type ninfo: L{objects.Node}
1460 @param ninfo: the node to check
1461 @param nresult: the results from the node
1463 @return: whether overall this call was successful (and we can expect
1464 reasonable values in the respose)
1468 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1470 # main result, nresult should be a non-empty dict
1471 test = not nresult or not isinstance(nresult, dict)
1472 _ErrorIf(test, self.ENODERPC, node,
1473 "unable to verify node: no data returned")
1477 # compares ganeti version
1478 local_version = constants.PROTOCOL_VERSION
1479 remote_version = nresult.get("version", None)
1480 test = not (remote_version and
1481 isinstance(remote_version, (list, tuple)) and
1482 len(remote_version) == 2)
1483 _ErrorIf(test, self.ENODERPC, node,
1484 "connection to node returned invalid data")
1488 test = local_version != remote_version[0]
1489 _ErrorIf(test, self.ENODEVERSION, node,
1490 "incompatible protocol versions: master %s,"
1491 " node %s", local_version, remote_version[0])
1495 # node seems compatible, we can actually try to look into its results
1497 # full package version
1498 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1499 self.ENODEVERSION, node,
1500 "software version mismatch: master %s, node %s",
1501 constants.RELEASE_VERSION, remote_version[1],
1502 code=self.ETYPE_WARNING)
1504 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1505 if isinstance(hyp_result, dict):
1506 for hv_name, hv_result in hyp_result.iteritems():
1507 test = hv_result is not None
1508 _ErrorIf(test, self.ENODEHV, node,
1509 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1512 test = nresult.get(constants.NV_NODESETUP,
1513 ["Missing NODESETUP results"])
1514 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1519 def _VerifyNodeTime(self, ninfo, nresult,
1520 nvinfo_starttime, nvinfo_endtime):
1521 """Check the node time.
1523 @type ninfo: L{objects.Node}
1524 @param ninfo: the node to check
1525 @param nresult: the remote results for the node
1526 @param nvinfo_starttime: the start time of the RPC call
1527 @param nvinfo_endtime: the end time of the RPC call
1531 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1533 ntime = nresult.get(constants.NV_TIME, None)
1535 ntime_merged = utils.MergeTime(ntime)
1536 except (ValueError, TypeError):
1537 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1540 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1541 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1542 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1543 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1547 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1548 "Node time diverges by at least %s from master node time",
1551 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1552 """Check the node time.
1554 @type ninfo: L{objects.Node}
1555 @param ninfo: the node to check
1556 @param nresult: the remote results for the node
1557 @param vg_name: the configured VG name
1564 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1566 # checks vg existence and size > 20G
1567 vglist = nresult.get(constants.NV_VGLIST, None)
1569 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1571 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1572 constants.MIN_VG_SIZE)
1573 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1576 pvlist = nresult.get(constants.NV_PVLIST, None)
1577 test = pvlist is None
1578 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1580 # check that ':' is not present in PV names, since it's a
1581 # special character for lvcreate (denotes the range of PEs to
1583 for _, pvname, owner_vg in pvlist:
1584 test = ":" in pvname
1585 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1586 " '%s' of VG '%s'", pvname, owner_vg)
1588 def _VerifyNodeNetwork(self, ninfo, nresult):
1589 """Check the node time.
1591 @type ninfo: L{objects.Node}
1592 @param ninfo: the node to check
1593 @param nresult: the remote results for the node
1597 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1599 test = constants.NV_NODELIST not in nresult
1600 _ErrorIf(test, self.ENODESSH, node,
1601 "node hasn't returned node ssh connectivity data")
1603 if nresult[constants.NV_NODELIST]:
1604 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1605 _ErrorIf(True, self.ENODESSH, node,
1606 "ssh communication with node '%s': %s", a_node, a_msg)
1608 test = constants.NV_NODENETTEST not in nresult
1609 _ErrorIf(test, self.ENODENET, node,
1610 "node hasn't returned node tcp connectivity data")
1612 if nresult[constants.NV_NODENETTEST]:
1613 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1615 _ErrorIf(True, self.ENODENET, node,
1616 "tcp communication with node '%s': %s",
1617 anode, nresult[constants.NV_NODENETTEST][anode])
1619 test = constants.NV_MASTERIP not in nresult
1620 _ErrorIf(test, self.ENODENET, node,
1621 "node hasn't returned node master IP reachability data")
1623 if not nresult[constants.NV_MASTERIP]:
1624 if node == self.master_node:
1625 msg = "the master node cannot reach the master IP (not configured?)"
1627 msg = "cannot reach the master IP"
1628 _ErrorIf(True, self.ENODENET, node, msg)
1631 def _VerifyInstance(self, instance, instanceconfig, node_image):
1632 """Verify an instance.
1634 This function checks to see if the required block devices are
1635 available on the instance's node.
1638 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1639 node_current = instanceconfig.primary_node
1641 node_vol_should = {}
1642 instanceconfig.MapLVsByNode(node_vol_should)
1644 for node in node_vol_should:
1645 n_img = node_image[node]
1646 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1647 # ignore missing volumes on offline or broken nodes
1649 for volume in node_vol_should[node]:
1650 test = volume not in n_img.volumes
1651 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1652 "volume %s missing on node %s", volume, node)
1654 if instanceconfig.admin_up:
1655 pri_img = node_image[node_current]
1656 test = instance not in pri_img.instances and not pri_img.offline
1657 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1658 "instance not running on its primary node %s",
1661 for node, n_img in node_image.items():
1662 if (not node == node_current):
1663 test = instance in n_img.instances
1664 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1665 "instance should not run on node %s", node)
1667 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1668 """Verify if there are any unknown volumes in the cluster.
1670 The .os, .swap and backup volumes are ignored. All other volumes are
1671 reported as unknown.
1673 @type reserved: L{ganeti.utils.FieldSet}
1674 @param reserved: a FieldSet of reserved volume names
1677 for node, n_img in node_image.items():
1678 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1679 # skip non-healthy nodes
1681 for volume in n_img.volumes:
1682 test = ((node not in node_vol_should or
1683 volume not in node_vol_should[node]) and
1684 not reserved.Matches(volume))
1685 self._ErrorIf(test, self.ENODEORPHANLV, node,
1686 "volume %s is unknown", volume)
1688 def _VerifyOrphanInstances(self, instancelist, node_image):
1689 """Verify the list of running instances.
1691 This checks what instances are running but unknown to the cluster.
1694 for node, n_img in node_image.items():
1695 for o_inst in n_img.instances:
1696 test = o_inst not in instancelist
1697 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1698 "instance %s on node %s should not exist", o_inst, node)
1700 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1701 """Verify N+1 Memory Resilience.
1703 Check that if one single node dies we can still start all the
1704 instances it was primary for.
1707 for node, n_img in node_image.items():
1708 # This code checks that every node which is now listed as
1709 # secondary has enough memory to host all instances it is
1710 # supposed to should a single other node in the cluster fail.
1711 # FIXME: not ready for failover to an arbitrary node
1712 # FIXME: does not support file-backed instances
1713 # WARNING: we currently take into account down instances as well
1714 # as up ones, considering that even if they're down someone
1715 # might want to start them even in the event of a node failure.
1716 for prinode, instances in n_img.sbp.items():
1718 for instance in instances:
1719 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1720 if bep[constants.BE_AUTO_BALANCE]:
1721 needed_mem += bep[constants.BE_MEMORY]
1722 test = n_img.mfree < needed_mem
1723 self._ErrorIf(test, self.ENODEN1, node,
1724 "not enough memory on to accommodate"
1725 " failovers should peer node %s fail", prinode)
1727 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1729 """Verifies and computes the node required file checksums.
1731 @type ninfo: L{objects.Node}
1732 @param ninfo: the node to check
1733 @param nresult: the remote results for the node
1734 @param file_list: required list of files
1735 @param local_cksum: dictionary of local files and their checksums
1736 @param master_files: list of files that only masters should have
1740 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1742 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1743 test = not isinstance(remote_cksum, dict)
1744 _ErrorIf(test, self.ENODEFILECHECK, node,
1745 "node hasn't returned file checksum data")
1749 for file_name in file_list:
1750 node_is_mc = ninfo.master_candidate
1751 must_have = (file_name not in master_files) or node_is_mc
1753 test1 = file_name not in remote_cksum
1755 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1757 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1758 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1759 "file '%s' missing", file_name)
1760 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1761 "file '%s' has wrong checksum", file_name)
1762 # not candidate and this is not a must-have file
1763 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1764 "file '%s' should not exist on non master"
1765 " candidates (and the file is outdated)", file_name)
1766 # all good, except non-master/non-must have combination
1767 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1768 "file '%s' should not exist"
1769 " on non master candidates", file_name)
1771 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1773 """Verifies and the node DRBD status.
1775 @type ninfo: L{objects.Node}
1776 @param ninfo: the node to check
1777 @param nresult: the remote results for the node
1778 @param instanceinfo: the dict of instances
1779 @param drbd_helper: the configured DRBD usermode helper
1780 @param drbd_map: the DRBD map as returned by
1781 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1785 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1788 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1789 test = (helper_result == None)
1790 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1791 "no drbd usermode helper returned")
1793 status, payload = helper_result
1795 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1796 "drbd usermode helper check unsuccessful: %s", payload)
1797 test = status and (payload != drbd_helper)
1798 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1799 "wrong drbd usermode helper: %s", payload)
1801 # compute the DRBD minors
1803 for minor, instance in drbd_map[node].items():
1804 test = instance not in instanceinfo
1805 _ErrorIf(test, self.ECLUSTERCFG, None,
1806 "ghost instance '%s' in temporary DRBD map", instance)
1807 # ghost instance should not be running, but otherwise we
1808 # don't give double warnings (both ghost instance and
1809 # unallocated minor in use)
1811 node_drbd[minor] = (instance, False)
1813 instance = instanceinfo[instance]
1814 node_drbd[minor] = (instance.name, instance.admin_up)
1816 # and now check them
1817 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1818 test = not isinstance(used_minors, (tuple, list))
1819 _ErrorIf(test, self.ENODEDRBD, node,
1820 "cannot parse drbd status file: %s", str(used_minors))
1822 # we cannot check drbd status
1825 for minor, (iname, must_exist) in node_drbd.items():
1826 test = minor not in used_minors and must_exist
1827 _ErrorIf(test, self.ENODEDRBD, node,
1828 "drbd minor %d of instance %s is not active", minor, iname)
1829 for minor in used_minors:
1830 test = minor not in node_drbd
1831 _ErrorIf(test, self.ENODEDRBD, node,
1832 "unallocated drbd minor %d is in use", minor)
1834 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1835 """Builds the node OS structures.
1837 @type ninfo: L{objects.Node}
1838 @param ninfo: the node to check
1839 @param nresult: the remote results for the node
1840 @param nimg: the node image object
1844 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1846 remote_os = nresult.get(constants.NV_OSLIST, None)
1847 test = (not isinstance(remote_os, list) or
1848 not compat.all(isinstance(v, list) and len(v) == 7
1849 for v in remote_os))
1851 _ErrorIf(test, self.ENODEOS, node,
1852 "node hasn't returned valid OS data")
1861 for (name, os_path, status, diagnose,
1862 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1864 if name not in os_dict:
1867 # parameters is a list of lists instead of list of tuples due to
1868 # JSON lacking a real tuple type, fix it:
1869 parameters = [tuple(v) for v in parameters]
1870 os_dict[name].append((os_path, status, diagnose,
1871 set(variants), set(parameters), set(api_ver)))
1873 nimg.oslist = os_dict
1875 def _VerifyNodeOS(self, ninfo, nimg, base):
1876 """Verifies the node OS list.
1878 @type ninfo: L{objects.Node}
1879 @param ninfo: the node to check
1880 @param nimg: the node image object
1881 @param base: the 'template' node we match against (e.g. from the master)
1885 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1887 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1889 for os_name, os_data in nimg.oslist.items():
1890 assert os_data, "Empty OS status for OS %s?!" % os_name
1891 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1892 _ErrorIf(not f_status, self.ENODEOS, node,
1893 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1894 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1895 "OS '%s' has multiple entries (first one shadows the rest): %s",
1896 os_name, utils.CommaJoin([v[0] for v in os_data]))
1897 # this will catched in backend too
1898 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1899 and not f_var, self.ENODEOS, node,
1900 "OS %s with API at least %d does not declare any variant",
1901 os_name, constants.OS_API_V15)
1902 # comparisons with the 'base' image
1903 test = os_name not in base.oslist
1904 _ErrorIf(test, self.ENODEOS, node,
1905 "Extra OS %s not present on reference node (%s)",
1909 assert base.oslist[os_name], "Base node has empty OS status?"
1910 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1912 # base OS is invalid, skipping
1914 for kind, a, b in [("API version", f_api, b_api),
1915 ("variants list", f_var, b_var),
1916 ("parameters", f_param, b_param)]:
1917 _ErrorIf(a != b, self.ENODEOS, node,
1918 "OS %s %s differs from reference node %s: %s vs. %s",
1919 kind, os_name, base.name,
1920 utils.CommaJoin(a), utils.CommaJoin(b))
1922 # check any missing OSes
1923 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1924 _ErrorIf(missing, self.ENODEOS, node,
1925 "OSes present on reference node %s but missing on this node: %s",
1926 base.name, utils.CommaJoin(missing))
1928 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1929 """Verifies and updates the node volume data.
1931 This function will update a L{NodeImage}'s internal structures
1932 with data from the remote call.
1934 @type ninfo: L{objects.Node}
1935 @param ninfo: the node to check
1936 @param nresult: the remote results for the node
1937 @param nimg: the node image object
1938 @param vg_name: the configured VG name
1942 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1944 nimg.lvm_fail = True
1945 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1948 elif isinstance(lvdata, basestring):
1949 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1950 utils.SafeEncode(lvdata))
1951 elif not isinstance(lvdata, dict):
1952 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1954 nimg.volumes = lvdata
1955 nimg.lvm_fail = False
1957 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1958 """Verifies and updates the node instance list.
1960 If the listing was successful, then updates this node's instance
1961 list. Otherwise, it marks the RPC call as failed for the instance
1964 @type ninfo: L{objects.Node}
1965 @param ninfo: the node to check
1966 @param nresult: the remote results for the node
1967 @param nimg: the node image object
1970 idata = nresult.get(constants.NV_INSTANCELIST, None)
1971 test = not isinstance(idata, list)
1972 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1973 " (instancelist): %s", utils.SafeEncode(str(idata)))
1975 nimg.hyp_fail = True
1977 nimg.instances = idata
1979 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1980 """Verifies and computes a node information map
1982 @type ninfo: L{objects.Node}
1983 @param ninfo: the node to check
1984 @param nresult: the remote results for the node
1985 @param nimg: the node image object
1986 @param vg_name: the configured VG name
1990 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1992 # try to read free memory (from the hypervisor)
1993 hv_info = nresult.get(constants.NV_HVINFO, None)
1994 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1995 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1998 nimg.mfree = int(hv_info["memory_free"])
1999 except (ValueError, TypeError):
2000 _ErrorIf(True, self.ENODERPC, node,
2001 "node returned invalid nodeinfo, check hypervisor")
2003 # FIXME: devise a free space model for file based instances as well
2004 if vg_name is not None:
2005 test = (constants.NV_VGLIST not in nresult or
2006 vg_name not in nresult[constants.NV_VGLIST])
2007 _ErrorIf(test, self.ENODELVM, node,
2008 "node didn't return data for the volume group '%s'"
2009 " - it is either missing or broken", vg_name)
2012 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2013 except (ValueError, TypeError):
2014 _ErrorIf(True, self.ENODERPC, node,
2015 "node returned invalid LVM info, check LVM status")
2017 def BuildHooksEnv(self):
2020 Cluster-Verify hooks just ran in the post phase and their failure makes
2021 the output be logged in the verify output and the verification to fail.
2024 all_nodes = self.cfg.GetNodeList()
2026 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2028 for node in self.cfg.GetAllNodesInfo().values():
2029 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2031 return env, [], all_nodes
2033 def Exec(self, feedback_fn):
2034 """Verify integrity of cluster, performing various test on nodes.
2038 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2039 verbose = self.op.verbose
2040 self._feedback_fn = feedback_fn
2041 feedback_fn("* Verifying global settings")
2042 for msg in self.cfg.VerifyConfig():
2043 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2045 # Check the cluster certificates
2046 for cert_filename in constants.ALL_CERT_FILES:
2047 (errcode, msg) = _VerifyCertificate(cert_filename)
2048 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2050 vg_name = self.cfg.GetVGName()
2051 drbd_helper = self.cfg.GetDRBDHelper()
2052 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2053 cluster = self.cfg.GetClusterInfo()
2054 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2055 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2056 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2057 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2058 for iname in instancelist)
2059 i_non_redundant = [] # Non redundant instances
2060 i_non_a_balanced = [] # Non auto-balanced instances
2061 n_offline = 0 # Count of offline nodes
2062 n_drained = 0 # Count of nodes being drained
2063 node_vol_should = {}
2065 # FIXME: verify OS list
2066 # do local checksums
2067 master_files = [constants.CLUSTER_CONF_FILE]
2068 master_node = self.master_node = self.cfg.GetMasterNode()
2069 master_ip = self.cfg.GetMasterIP()
2071 file_names = ssconf.SimpleStore().GetFileList()
2072 file_names.extend(constants.ALL_CERT_FILES)
2073 file_names.extend(master_files)
2074 if cluster.modify_etc_hosts:
2075 file_names.append(constants.ETC_HOSTS)
2077 local_checksums = utils.FingerprintFiles(file_names)
2079 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2080 node_verify_param = {
2081 constants.NV_FILELIST: file_names,
2082 constants.NV_NODELIST: [node.name for node in nodeinfo
2083 if not node.offline],
2084 constants.NV_HYPERVISOR: hypervisors,
2085 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2086 node.secondary_ip) for node in nodeinfo
2087 if not node.offline],
2088 constants.NV_INSTANCELIST: hypervisors,
2089 constants.NV_VERSION: None,
2090 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2091 constants.NV_NODESETUP: None,
2092 constants.NV_TIME: None,
2093 constants.NV_MASTERIP: (master_node, master_ip),
2094 constants.NV_OSLIST: None,
2097 if vg_name is not None:
2098 node_verify_param[constants.NV_VGLIST] = None
2099 node_verify_param[constants.NV_LVLIST] = vg_name
2100 node_verify_param[constants.NV_PVLIST] = [vg_name]
2101 node_verify_param[constants.NV_DRBDLIST] = None
2104 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2106 # Build our expected cluster state
2107 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2109 for node in nodeinfo)
2111 for instance in instancelist:
2112 inst_config = instanceinfo[instance]
2114 for nname in inst_config.all_nodes:
2115 if nname not in node_image:
2117 gnode = self.NodeImage(name=nname)
2119 node_image[nname] = gnode
2121 inst_config.MapLVsByNode(node_vol_should)
2123 pnode = inst_config.primary_node
2124 node_image[pnode].pinst.append(instance)
2126 for snode in inst_config.secondary_nodes:
2127 nimg = node_image[snode]
2128 nimg.sinst.append(instance)
2129 if pnode not in nimg.sbp:
2130 nimg.sbp[pnode] = []
2131 nimg.sbp[pnode].append(instance)
2133 # At this point, we have the in-memory data structures complete,
2134 # except for the runtime information, which we'll gather next
2136 # Due to the way our RPC system works, exact response times cannot be
2137 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2138 # time before and after executing the request, we can at least have a time
2140 nvinfo_starttime = time.time()
2141 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2142 self.cfg.GetClusterName())
2143 nvinfo_endtime = time.time()
2145 all_drbd_map = self.cfg.ComputeDRBDMap()
2147 feedback_fn("* Verifying node status")
2151 for node_i in nodeinfo:
2153 nimg = node_image[node]
2157 feedback_fn("* Skipping offline node %s" % (node,))
2161 if node == master_node:
2163 elif node_i.master_candidate:
2164 ntype = "master candidate"
2165 elif node_i.drained:
2171 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2173 msg = all_nvinfo[node].fail_msg
2174 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2176 nimg.rpc_fail = True
2179 nresult = all_nvinfo[node].payload
2181 nimg.call_ok = self._VerifyNode(node_i, nresult)
2182 self._VerifyNodeNetwork(node_i, nresult)
2183 self._VerifyNodeLVM(node_i, nresult, vg_name)
2184 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2186 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2188 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2190 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2191 self._UpdateNodeInstances(node_i, nresult, nimg)
2192 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2193 self._UpdateNodeOS(node_i, nresult, nimg)
2194 if not nimg.os_fail:
2195 if refos_img is None:
2197 self._VerifyNodeOS(node_i, nimg, refos_img)
2199 feedback_fn("* Verifying instance status")
2200 for instance in instancelist:
2202 feedback_fn("* Verifying instance %s" % instance)
2203 inst_config = instanceinfo[instance]
2204 self._VerifyInstance(instance, inst_config, node_image)
2205 inst_nodes_offline = []
2207 pnode = inst_config.primary_node
2208 pnode_img = node_image[pnode]
2209 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2210 self.ENODERPC, pnode, "instance %s, connection to"
2211 " primary node failed", instance)
2213 if pnode_img.offline:
2214 inst_nodes_offline.append(pnode)
2216 # If the instance is non-redundant we cannot survive losing its primary
2217 # node, so we are not N+1 compliant. On the other hand we have no disk
2218 # templates with more than one secondary so that situation is not well
2220 # FIXME: does not support file-backed instances
2221 if not inst_config.secondary_nodes:
2222 i_non_redundant.append(instance)
2223 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2224 instance, "instance has multiple secondary nodes: %s",
2225 utils.CommaJoin(inst_config.secondary_nodes),
2226 code=self.ETYPE_WARNING)
2228 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2229 i_non_a_balanced.append(instance)
2231 for snode in inst_config.secondary_nodes:
2232 s_img = node_image[snode]
2233 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2234 "instance %s, connection to secondary node failed", instance)
2237 inst_nodes_offline.append(snode)
2239 # warn that the instance lives on offline nodes
2240 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2241 "instance lives on offline node(s) %s",
2242 utils.CommaJoin(inst_nodes_offline))
2243 # ... or ghost nodes
2244 for node in inst_config.all_nodes:
2245 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2246 "instance lives on ghost node %s", node)
2248 feedback_fn("* Verifying orphan volumes")
2249 reserved = utils.FieldSet(*cluster.reserved_lvs)
2250 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2252 feedback_fn("* Verifying orphan instances")
2253 self._VerifyOrphanInstances(instancelist, node_image)
2255 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2256 feedback_fn("* Verifying N+1 Memory redundancy")
2257 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2259 feedback_fn("* Other Notes")
2261 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2262 % len(i_non_redundant))
2264 if i_non_a_balanced:
2265 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2266 % len(i_non_a_balanced))
2269 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2272 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2276 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2277 """Analyze the post-hooks' result
2279 This method analyses the hook result, handles it, and sends some
2280 nicely-formatted feedback back to the user.
2282 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2283 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2284 @param hooks_results: the results of the multi-node hooks rpc call
2285 @param feedback_fn: function used send feedback back to the caller
2286 @param lu_result: previous Exec result
2287 @return: the new Exec result, based on the previous result
2291 # We only really run POST phase hooks, and are only interested in
2293 if phase == constants.HOOKS_PHASE_POST:
2294 # Used to change hooks' output to proper indentation
2295 indent_re = re.compile('^', re.M)
2296 feedback_fn("* Hooks Results")
2297 assert hooks_results, "invalid result from hooks"
2299 for node_name in hooks_results:
2300 res = hooks_results[node_name]
2302 test = msg and not res.offline
2303 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2304 "Communication failure in hooks execution: %s", msg)
2305 if res.offline or msg:
2306 # No need to investigate payload if node is offline or gave an error.
2307 # override manually lu_result here as _ErrorIf only
2308 # overrides self.bad
2311 for script, hkr, output in res.payload:
2312 test = hkr == constants.HKR_FAIL
2313 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2314 "Script %s failed, output:", script)
2316 output = indent_re.sub(' ', output)
2317 feedback_fn("%s" % output)
2323 class LUVerifyDisks(NoHooksLU):
2324 """Verifies the cluster disks status.
2329 def ExpandNames(self):
2330 self.needed_locks = {
2331 locking.LEVEL_NODE: locking.ALL_SET,
2332 locking.LEVEL_INSTANCE: locking.ALL_SET,
2334 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2336 def Exec(self, feedback_fn):
2337 """Verify integrity of cluster disks.
2339 @rtype: tuple of three items
2340 @return: a tuple of (dict of node-to-node_error, list of instances
2341 which need activate-disks, dict of instance: (node, volume) for
2345 result = res_nodes, res_instances, res_missing = {}, [], {}
2347 vg_name = self.cfg.GetVGName()
2348 nodes = utils.NiceSort(self.cfg.GetNodeList())
2349 instances = [self.cfg.GetInstanceInfo(name)
2350 for name in self.cfg.GetInstanceList()]
2353 for inst in instances:
2355 if (not inst.admin_up or
2356 inst.disk_template not in constants.DTS_NET_MIRROR):
2358 inst.MapLVsByNode(inst_lvs)
2359 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2360 for node, vol_list in inst_lvs.iteritems():
2361 for vol in vol_list:
2362 nv_dict[(node, vol)] = inst
2367 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2371 node_res = node_lvs[node]
2372 if node_res.offline:
2374 msg = node_res.fail_msg
2376 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2377 res_nodes[node] = msg
2380 lvs = node_res.payload
2381 for lv_name, (_, _, lv_online) in lvs.items():
2382 inst = nv_dict.pop((node, lv_name), None)
2383 if (not lv_online and inst is not None
2384 and inst.name not in res_instances):
2385 res_instances.append(inst.name)
2387 # any leftover items in nv_dict are missing LVs, let's arrange the
2389 for key, inst in nv_dict.iteritems():
2390 if inst.name not in res_missing:
2391 res_missing[inst.name] = []
2392 res_missing[inst.name].append(key)
2397 class LURepairDiskSizes(NoHooksLU):
2398 """Verifies the cluster disks sizes.
2401 _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2404 def ExpandNames(self):
2405 if self.op.instances:
2406 self.wanted_names = []
2407 for name in self.op.instances:
2408 full_name = _ExpandInstanceName(self.cfg, name)
2409 self.wanted_names.append(full_name)
2410 self.needed_locks = {
2411 locking.LEVEL_NODE: [],
2412 locking.LEVEL_INSTANCE: self.wanted_names,
2414 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2416 self.wanted_names = None
2417 self.needed_locks = {
2418 locking.LEVEL_NODE: locking.ALL_SET,
2419 locking.LEVEL_INSTANCE: locking.ALL_SET,
2421 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2423 def DeclareLocks(self, level):
2424 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2425 self._LockInstancesNodes(primary_only=True)
2427 def CheckPrereq(self):
2428 """Check prerequisites.
2430 This only checks the optional instance list against the existing names.
2433 if self.wanted_names is None:
2434 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2436 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2437 in self.wanted_names]
2439 def _EnsureChildSizes(self, disk):
2440 """Ensure children of the disk have the needed disk size.
2442 This is valid mainly for DRBD8 and fixes an issue where the
2443 children have smaller disk size.
2445 @param disk: an L{ganeti.objects.Disk} object
2448 if disk.dev_type == constants.LD_DRBD8:
2449 assert disk.children, "Empty children for DRBD8?"
2450 fchild = disk.children[0]
2451 mismatch = fchild.size < disk.size
2453 self.LogInfo("Child disk has size %d, parent %d, fixing",
2454 fchild.size, disk.size)
2455 fchild.size = disk.size
2457 # and we recurse on this child only, not on the metadev
2458 return self._EnsureChildSizes(fchild) or mismatch
2462 def Exec(self, feedback_fn):
2463 """Verify the size of cluster disks.
2466 # TODO: check child disks too
2467 # TODO: check differences in size between primary/secondary nodes
2469 for instance in self.wanted_instances:
2470 pnode = instance.primary_node
2471 if pnode not in per_node_disks:
2472 per_node_disks[pnode] = []
2473 for idx, disk in enumerate(instance.disks):
2474 per_node_disks[pnode].append((instance, idx, disk))
2477 for node, dskl in per_node_disks.items():
2478 newl = [v[2].Copy() for v in dskl]
2480 self.cfg.SetDiskID(dsk, node)
2481 result = self.rpc.call_blockdev_getsizes(node, newl)
2483 self.LogWarning("Failure in blockdev_getsizes call to node"
2484 " %s, ignoring", node)
2486 if len(result.data) != len(dskl):
2487 self.LogWarning("Invalid result from node %s, ignoring node results",
2490 for ((instance, idx, disk), size) in zip(dskl, result.data):
2492 self.LogWarning("Disk %d of instance %s did not return size"
2493 " information, ignoring", idx, instance.name)
2495 if not isinstance(size, (int, long)):
2496 self.LogWarning("Disk %d of instance %s did not return valid"
2497 " size information, ignoring", idx, instance.name)
2500 if size != disk.size:
2501 self.LogInfo("Disk %d of instance %s has mismatched size,"
2502 " correcting: recorded %d, actual %d", idx,
2503 instance.name, disk.size, size)
2505 self.cfg.Update(instance, feedback_fn)
2506 changed.append((instance.name, idx, size))
2507 if self._EnsureChildSizes(disk):
2508 self.cfg.Update(instance, feedback_fn)
2509 changed.append((instance.name, idx, disk.size))
2513 class LURenameCluster(LogicalUnit):
2514 """Rename the cluster.
2517 HPATH = "cluster-rename"
2518 HTYPE = constants.HTYPE_CLUSTER
2519 _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2521 def BuildHooksEnv(self):
2526 "OP_TARGET": self.cfg.GetClusterName(),
2527 "NEW_NAME": self.op.name,
2529 mn = self.cfg.GetMasterNode()
2530 all_nodes = self.cfg.GetNodeList()
2531 return env, [mn], all_nodes
2533 def CheckPrereq(self):
2534 """Verify that the passed name is a valid one.
2537 hostname = netutils.GetHostname(name=self.op.name,
2538 family=self.cfg.GetPrimaryIPFamily())
2540 new_name = hostname.name
2541 self.ip = new_ip = hostname.ip
2542 old_name = self.cfg.GetClusterName()
2543 old_ip = self.cfg.GetMasterIP()
2544 if new_name == old_name and new_ip == old_ip:
2545 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2546 " cluster has changed",
2548 if new_ip != old_ip:
2549 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2550 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2551 " reachable on the network" %
2552 new_ip, errors.ECODE_NOTUNIQUE)
2554 self.op.name = new_name
2556 def Exec(self, feedback_fn):
2557 """Rename the cluster.
2560 clustername = self.op.name
2563 # shutdown the master IP
2564 master = self.cfg.GetMasterNode()
2565 result = self.rpc.call_node_stop_master(master, False)
2566 result.Raise("Could not disable the master role")
2569 cluster = self.cfg.GetClusterInfo()
2570 cluster.cluster_name = clustername
2571 cluster.master_ip = ip
2572 self.cfg.Update(cluster, feedback_fn)
2574 # update the known hosts file
2575 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2576 node_list = self.cfg.GetNodeList()
2578 node_list.remove(master)
2581 result = self.rpc.call_upload_file(node_list,
2582 constants.SSH_KNOWN_HOSTS_FILE)
2583 for to_node, to_result in result.iteritems():
2584 msg = to_result.fail_msg
2586 msg = ("Copy of file %s to node %s failed: %s" %
2587 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2588 self.proc.LogWarning(msg)
2591 result = self.rpc.call_node_start_master(master, False, False)
2592 msg = result.fail_msg
2594 self.LogWarning("Could not re-enable the master role on"
2595 " the master, please restart manually: %s", msg)
2600 class LUSetClusterParams(LogicalUnit):
2601 """Change the parameters of the cluster.
2604 HPATH = "cluster-modify"
2605 HTYPE = constants.HTYPE_CLUSTER
2607 ("vg_name", None, _TMaybeString),
2608 ("enabled_hypervisors", None,
2609 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2610 ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2611 ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2612 ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2613 ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2614 ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2615 ("uid_pool", None, _NoType),
2616 ("add_uids", None, _NoType),
2617 ("remove_uids", None, _NoType),
2618 ("maintain_node_health", None, _TMaybeBool),
2619 ("nicparams", None, _TOr(_TDict, _TNone)),
2620 ("drbd_helper", None, _TOr(_TString, _TNone)),
2621 ("default_iallocator", None, _TMaybeString),
2622 ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2623 ("hidden_os", None, _TOr(_TListOf(\
2626 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2628 ("blacklisted_os", None, _TOr(_TListOf(\
2631 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2636 def CheckArguments(self):
2640 if self.op.uid_pool:
2641 uidpool.CheckUidPool(self.op.uid_pool)
2643 if self.op.add_uids:
2644 uidpool.CheckUidPool(self.op.add_uids)
2646 if self.op.remove_uids:
2647 uidpool.CheckUidPool(self.op.remove_uids)
2649 def ExpandNames(self):
2650 # FIXME: in the future maybe other cluster params won't require checking on
2651 # all nodes to be modified.
2652 self.needed_locks = {
2653 locking.LEVEL_NODE: locking.ALL_SET,
2655 self.share_locks[locking.LEVEL_NODE] = 1
2657 def BuildHooksEnv(self):
2662 "OP_TARGET": self.cfg.GetClusterName(),
2663 "NEW_VG_NAME": self.op.vg_name,
2665 mn = self.cfg.GetMasterNode()
2666 return env, [mn], [mn]
2668 def CheckPrereq(self):
2669 """Check prerequisites.
2671 This checks whether the given params don't conflict and
2672 if the given volume group is valid.
2675 if self.op.vg_name is not None and not self.op.vg_name:
2676 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2677 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2678 " instances exist", errors.ECODE_INVAL)
2680 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2681 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2682 raise errors.OpPrereqError("Cannot disable drbd helper while"
2683 " drbd-based instances exist",
2686 node_list = self.acquired_locks[locking.LEVEL_NODE]
2688 # if vg_name not None, checks given volume group on all nodes
2690 vglist = self.rpc.call_vg_list(node_list)
2691 for node in node_list:
2692 msg = vglist[node].fail_msg
2694 # ignoring down node
2695 self.LogWarning("Error while gathering data on node %s"
2696 " (ignoring node): %s", node, msg)
2698 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2700 constants.MIN_VG_SIZE)
2702 raise errors.OpPrereqError("Error on node '%s': %s" %
2703 (node, vgstatus), errors.ECODE_ENVIRON)
2705 if self.op.drbd_helper:
2706 # checks given drbd helper on all nodes
2707 helpers = self.rpc.call_drbd_helper(node_list)
2708 for node in node_list:
2709 ninfo = self.cfg.GetNodeInfo(node)
2711 self.LogInfo("Not checking drbd helper on offline node %s", node)
2713 msg = helpers[node].fail_msg
2715 raise errors.OpPrereqError("Error checking drbd helper on node"
2716 " '%s': %s" % (node, msg),
2717 errors.ECODE_ENVIRON)
2718 node_helper = helpers[node].payload
2719 if node_helper != self.op.drbd_helper:
2720 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2721 (node, node_helper), errors.ECODE_ENVIRON)
2723 self.cluster = cluster = self.cfg.GetClusterInfo()
2724 # validate params changes
2725 if self.op.beparams:
2726 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2727 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2729 if self.op.nicparams:
2730 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2731 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2732 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2735 # check all instances for consistency
2736 for instance in self.cfg.GetAllInstancesInfo().values():
2737 for nic_idx, nic in enumerate(instance.nics):
2738 params_copy = copy.deepcopy(nic.nicparams)
2739 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2741 # check parameter syntax
2743 objects.NIC.CheckParameterSyntax(params_filled)
2744 except errors.ConfigurationError, err:
2745 nic_errors.append("Instance %s, nic/%d: %s" %
2746 (instance.name, nic_idx, err))
2748 # if we're moving instances to routed, check that they have an ip
2749 target_mode = params_filled[constants.NIC_MODE]
2750 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2751 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2752 (instance.name, nic_idx))
2754 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2755 "\n".join(nic_errors))
2757 # hypervisor list/parameters
2758 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2759 if self.op.hvparams:
2760 for hv_name, hv_dict in self.op.hvparams.items():
2761 if hv_name not in self.new_hvparams:
2762 self.new_hvparams[hv_name] = hv_dict
2764 self.new_hvparams[hv_name].update(hv_dict)
2766 # os hypervisor parameters
2767 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2769 for os_name, hvs in self.op.os_hvp.items():
2770 if os_name not in self.new_os_hvp:
2771 self.new_os_hvp[os_name] = hvs
2773 for hv_name, hv_dict in hvs.items():
2774 if hv_name not in self.new_os_hvp[os_name]:
2775 self.new_os_hvp[os_name][hv_name] = hv_dict
2777 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2780 self.new_osp = objects.FillDict(cluster.osparams, {})
2781 if self.op.osparams:
2782 for os_name, osp in self.op.osparams.items():
2783 if os_name not in self.new_osp:
2784 self.new_osp[os_name] = {}
2786 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2789 if not self.new_osp[os_name]:
2790 # we removed all parameters
2791 del self.new_osp[os_name]
2793 # check the parameter validity (remote check)
2794 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2795 os_name, self.new_osp[os_name])
2797 # changes to the hypervisor list
2798 if self.op.enabled_hypervisors is not None:
2799 self.hv_list = self.op.enabled_hypervisors
2800 for hv in self.hv_list:
2801 # if the hypervisor doesn't already exist in the cluster
2802 # hvparams, we initialize it to empty, and then (in both
2803 # cases) we make sure to fill the defaults, as we might not
2804 # have a complete defaults list if the hypervisor wasn't
2806 if hv not in new_hvp:
2808 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2809 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2811 self.hv_list = cluster.enabled_hypervisors
2813 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2814 # either the enabled list has changed, or the parameters have, validate
2815 for hv_name, hv_params in self.new_hvparams.items():
2816 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2817 (self.op.enabled_hypervisors and
2818 hv_name in self.op.enabled_hypervisors)):
2819 # either this is a new hypervisor, or its parameters have changed
2820 hv_class = hypervisor.GetHypervisor(hv_name)
2821 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2822 hv_class.CheckParameterSyntax(hv_params)
2823 _CheckHVParams(self, node_list, hv_name, hv_params)
2826 # no need to check any newly-enabled hypervisors, since the
2827 # defaults have already been checked in the above code-block
2828 for os_name, os_hvp in self.new_os_hvp.items():
2829 for hv_name, hv_params in os_hvp.items():
2830 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2831 # we need to fill in the new os_hvp on top of the actual hv_p
2832 cluster_defaults = self.new_hvparams.get(hv_name, {})
2833 new_osp = objects.FillDict(cluster_defaults, hv_params)
2834 hv_class = hypervisor.GetHypervisor(hv_name)
2835 hv_class.CheckParameterSyntax(new_osp)
2836 _CheckHVParams(self, node_list, hv_name, new_osp)
2838 if self.op.default_iallocator:
2839 alloc_script = utils.FindFile(self.op.default_iallocator,
2840 constants.IALLOCATOR_SEARCH_PATH,
2842 if alloc_script is None:
2843 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2844 " specified" % self.op.default_iallocator,
2847 def Exec(self, feedback_fn):
2848 """Change the parameters of the cluster.
2851 if self.op.vg_name is not None:
2852 new_volume = self.op.vg_name
2855 if new_volume != self.cfg.GetVGName():
2856 self.cfg.SetVGName(new_volume)
2858 feedback_fn("Cluster LVM configuration already in desired"
2859 " state, not changing")
2860 if self.op.drbd_helper is not None:
2861 new_helper = self.op.drbd_helper
2864 if new_helper != self.cfg.GetDRBDHelper():
2865 self.cfg.SetDRBDHelper(new_helper)
2867 feedback_fn("Cluster DRBD helper already in desired state,"
2869 if self.op.hvparams:
2870 self.cluster.hvparams = self.new_hvparams
2872 self.cluster.os_hvp = self.new_os_hvp
2873 if self.op.enabled_hypervisors is not None:
2874 self.cluster.hvparams = self.new_hvparams
2875 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2876 if self.op.beparams:
2877 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2878 if self.op.nicparams:
2879 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2880 if self.op.osparams:
2881 self.cluster.osparams = self.new_osp
2883 if self.op.candidate_pool_size is not None:
2884 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2885 # we need to update the pool size here, otherwise the save will fail
2886 _AdjustCandidatePool(self, [])
2888 if self.op.maintain_node_health is not None:
2889 self.cluster.maintain_node_health = self.op.maintain_node_health
2891 if self.op.add_uids is not None:
2892 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2894 if self.op.remove_uids is not None:
2895 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2897 if self.op.uid_pool is not None:
2898 self.cluster.uid_pool = self.op.uid_pool
2900 if self.op.default_iallocator is not None:
2901 self.cluster.default_iallocator = self.op.default_iallocator
2903 if self.op.reserved_lvs is not None:
2904 self.cluster.reserved_lvs = self.op.reserved_lvs
2906 def helper_os(aname, mods, desc):
2908 lst = getattr(self.cluster, aname)
2909 for key, val in mods:
2910 if key == constants.DDM_ADD:
2912 feedback_fn("OS %s already in %s, ignoring", val, desc)
2915 elif key == constants.DDM_REMOVE:
2919 feedback_fn("OS %s not found in %s, ignoring", val, desc)
2921 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2923 if self.op.hidden_os:
2924 helper_os("hidden_os", self.op.hidden_os, "hidden")
2926 if self.op.blacklisted_os:
2927 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2929 self.cfg.Update(self.cluster, feedback_fn)
2932 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2933 """Distribute additional files which are part of the cluster configuration.
2935 ConfigWriter takes care of distributing the config and ssconf files, but
2936 there are more files which should be distributed to all nodes. This function
2937 makes sure those are copied.
2939 @param lu: calling logical unit
2940 @param additional_nodes: list of nodes not in the config to distribute to
2943 # 1. Gather target nodes
2944 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2945 dist_nodes = lu.cfg.GetOnlineNodeList()
2946 if additional_nodes is not None:
2947 dist_nodes.extend(additional_nodes)
2948 if myself.name in dist_nodes:
2949 dist_nodes.remove(myself.name)
2951 # 2. Gather files to distribute
2952 dist_files = set([constants.ETC_HOSTS,
2953 constants.SSH_KNOWN_HOSTS_FILE,
2954 constants.RAPI_CERT_FILE,
2955 constants.RAPI_USERS_FILE,
2956 constants.CONFD_HMAC_KEY,
2957 constants.CLUSTER_DOMAIN_SECRET_FILE,
2960 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2961 for hv_name in enabled_hypervisors:
2962 hv_class = hypervisor.GetHypervisor(hv_name)
2963 dist_files.update(hv_class.GetAncillaryFiles())
2965 # 3. Perform the files upload
2966 for fname in dist_files:
2967 if os.path.exists(fname):
2968 result = lu.rpc.call_upload_file(dist_nodes, fname)
2969 for to_node, to_result in result.items():
2970 msg = to_result.fail_msg
2972 msg = ("Copy of file %s to node %s failed: %s" %
2973 (fname, to_node, msg))
2974 lu.proc.LogWarning(msg)
2977 class LURedistributeConfig(NoHooksLU):
2978 """Force the redistribution of cluster configuration.
2980 This is a very simple LU.
2985 def ExpandNames(self):
2986 self.needed_locks = {
2987 locking.LEVEL_NODE: locking.ALL_SET,
2989 self.share_locks[locking.LEVEL_NODE] = 1
2991 def Exec(self, feedback_fn):
2992 """Redistribute the configuration.
2995 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2996 _RedistributeAncillaryFiles(self)
2999 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3000 """Sleep and poll for an instance's disk to sync.
3003 if not instance.disks or disks is not None and not disks:
3006 disks = _ExpandCheckDisks(instance, disks)
3009 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3011 node = instance.primary_node
3014 lu.cfg.SetDiskID(dev, node)
3016 # TODO: Convert to utils.Retry
3019 degr_retries = 10 # in seconds, as we sleep 1 second each time
3023 cumul_degraded = False
3024 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3025 msg = rstats.fail_msg
3027 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3030 raise errors.RemoteError("Can't contact node %s for mirror data,"
3031 " aborting." % node)
3034 rstats = rstats.payload
3036 for i, mstat in enumerate(rstats):
3038 lu.LogWarning("Can't compute data for node %s/%s",
3039 node, disks[i].iv_name)
3042 cumul_degraded = (cumul_degraded or
3043 (mstat.is_degraded and mstat.sync_percent is None))
3044 if mstat.sync_percent is not None:
3046 if mstat.estimated_time is not None:
3047 rem_time = ("%s remaining (estimated)" %
3048 utils.FormatSeconds(mstat.estimated_time))
3049 max_time = mstat.estimated_time
3051 rem_time = "no time estimate"
3052 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3053 (disks[i].iv_name, mstat.sync_percent, rem_time))
3055 # if we're done but degraded, let's do a few small retries, to
3056 # make sure we see a stable and not transient situation; therefore
3057 # we force restart of the loop
3058 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3059 logging.info("Degraded disks found, %d retries left", degr_retries)
3067 time.sleep(min(60, max_time))
3070 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3071 return not cumul_degraded
3074 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3075 """Check that mirrors are not degraded.
3077 The ldisk parameter, if True, will change the test from the
3078 is_degraded attribute (which represents overall non-ok status for
3079 the device(s)) to the ldisk (representing the local storage status).
3082 lu.cfg.SetDiskID(dev, node)
3086 if on_primary or dev.AssembleOnSecondary():
3087 rstats = lu.rpc.call_blockdev_find(node, dev)
3088 msg = rstats.fail_msg
3090 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3092 elif not rstats.payload:
3093 lu.LogWarning("Can't find disk on node %s", node)
3097 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3099 result = result and not rstats.payload.is_degraded
3102 for child in dev.children:
3103 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3108 class LUDiagnoseOS(NoHooksLU):
3109 """Logical unit for OS diagnose/query.
3114 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3118 _BLK = "blacklisted"
3120 _FIELDS_STATIC = utils.FieldSet()
3121 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3122 "parameters", "api_versions", _HID, _BLK)
3124 def CheckArguments(self):
3126 raise errors.OpPrereqError("Selective OS query not supported",
3129 _CheckOutputFields(static=self._FIELDS_STATIC,
3130 dynamic=self._FIELDS_DYNAMIC,
3131 selected=self.op.output_fields)
3133 def ExpandNames(self):
3134 # Lock all nodes, in shared mode
3135 # Temporary removal of locks, should be reverted later
3136 # TODO: reintroduce locks when they are lighter-weight
3137 self.needed_locks = {}
3138 #self.share_locks[locking.LEVEL_NODE] = 1
3139 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3142 def _DiagnoseByOS(rlist):
3143 """Remaps a per-node return list into an a per-os per-node dictionary
3145 @param rlist: a map with node names as keys and OS objects as values
3148 @return: a dictionary with osnames as keys and as value another
3149 map, with nodes as keys and tuples of (path, status, diagnose,
3150 variants, parameters, api_versions) as values, eg::
3152 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3153 (/srv/..., False, "invalid api")],
3154 "node2": [(/srv/..., True, "", [], [])]}
3159 # we build here the list of nodes that didn't fail the RPC (at RPC
3160 # level), so that nodes with a non-responding node daemon don't
3161 # make all OSes invalid
3162 good_nodes = [node_name for node_name in rlist
3163 if not rlist[node_name].fail_msg]
3164 for node_name, nr in rlist.items():
3165 if nr.fail_msg or not nr.payload:
3167 for (name, path, status, diagnose, variants,
3168 params, api_versions) in nr.payload:
3169 if name not in all_os:
3170 # build a list of nodes for this os containing empty lists
3171 # for each node in node_list
3173 for nname in good_nodes:
3174 all_os[name][nname] = []
3175 # convert params from [name, help] to (name, help)
3176 params = [tuple(v) for v in params]
3177 all_os[name][node_name].append((path, status, diagnose,
3178 variants, params, api_versions))
3181 def Exec(self, feedback_fn):
3182 """Compute the list of OSes.
3185 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3186 node_data = self.rpc.call_os_diagnose(valid_nodes)
3187 pol = self._DiagnoseByOS(node_data)
3189 cluster = self.cfg.GetClusterInfo()
3191 for os_name in utils.NiceSort(pol.keys()):
3192 os_data = pol[os_name]
3195 (variants, params, api_versions) = null_state = (set(), set(), set())
3196 for idx, osl in enumerate(os_data.values()):
3197 valid = bool(valid and osl and osl[0][1])
3199 (variants, params, api_versions) = null_state
3201 node_variants, node_params, node_api = osl[0][3:6]
3202 if idx == 0: # first entry
3203 variants = set(node_variants)
3204 params = set(node_params)
3205 api_versions = set(node_api)
3206 else: # keep consistency
3207 variants.intersection_update(node_variants)
3208 params.intersection_update(node_params)
3209 api_versions.intersection_update(node_api)
3211 is_hid = os_name in cluster.hidden_os
3212 is_blk = os_name in cluster.blacklisted_os
3213 if ((self._HID not in self.op.output_fields and is_hid) or
3214 (self._BLK not in self.op.output_fields and is_blk) or
3215 (self._VLD not in self.op.output_fields and not valid)):
3218 for field in self.op.output_fields:
3221 elif field == self._VLD:
3223 elif field == "node_status":
3224 # this is just a copy of the dict
3226 for node_name, nos_list in os_data.items():
3227 val[node_name] = nos_list
3228 elif field == "variants":
3229 val = utils.NiceSort(list(variants))
3230 elif field == "parameters":
3232 elif field == "api_versions":
3233 val = list(api_versions)
3234 elif field == self._HID:
3236 elif field == self._BLK:
3239 raise errors.ParameterError(field)
3246 class LURemoveNode(LogicalUnit):
3247 """Logical unit for removing a node.
3250 HPATH = "node-remove"
3251 HTYPE = constants.HTYPE_NODE
3256 def BuildHooksEnv(self):
3259 This doesn't run on the target node in the pre phase as a failed
3260 node would then be impossible to remove.
3264 "OP_TARGET": self.op.node_name,
3265 "NODE_NAME": self.op.node_name,
3267 all_nodes = self.cfg.GetNodeList()
3269 all_nodes.remove(self.op.node_name)
3271 logging.warning("Node %s which is about to be removed not found"
3272 " in the all nodes list", self.op.node_name)
3273 return env, all_nodes, all_nodes
3275 def CheckPrereq(self):
3276 """Check prerequisites.
3279 - the node exists in the configuration
3280 - it does not have primary or secondary instances
3281 - it's not the master
3283 Any errors are signaled by raising errors.OpPrereqError.
3286 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3287 node = self.cfg.GetNodeInfo(self.op.node_name)
3288 assert node is not None
3290 instance_list = self.cfg.GetInstanceList()
3292 masternode = self.cfg.GetMasterNode()
3293 if node.name == masternode:
3294 raise errors.OpPrereqError("Node is the master node,"
3295 " you need to failover first.",
3298 for instance_name in instance_list:
3299 instance = self.cfg.GetInstanceInfo(instance_name)
3300 if node.name in instance.all_nodes:
3301 raise errors.OpPrereqError("Instance %s is still running on the node,"
3302 " please remove first." % instance_name,
3304 self.op.node_name = node.name
3307 def Exec(self, feedback_fn):
3308 """Removes the node from the cluster.
3312 logging.info("Stopping the node daemon and removing configs from node %s",
3315 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3317 # Promote nodes to master candidate as needed
3318 _AdjustCandidatePool(self, exceptions=[node.name])
3319 self.context.RemoveNode(node.name)
3321 # Run post hooks on the node before it's removed
3322 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3324 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3326 # pylint: disable-msg=W0702
3327 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3329 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3330 msg = result.fail_msg
3332 self.LogWarning("Errors encountered on the remote node while leaving"
3333 " the cluster: %s", msg)
3335 # Remove node from our /etc/hosts
3336 if self.cfg.GetClusterInfo().modify_etc_hosts:
3337 master_node = self.cfg.GetMasterNode()
3338 result = self.rpc.call_etc_hosts_modify(master_node,
3339 constants.ETC_HOSTS_REMOVE,
3341 result.Raise("Can't update hosts file with new host data")
3342 _RedistributeAncillaryFiles(self)
3345 class LUQueryNodes(NoHooksLU):
3346 """Logical unit for querying nodes.
3349 # pylint: disable-msg=W0142
3352 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3353 ("use_locking", False, _TBool),
3357 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3358 "master_candidate", "offline", "drained"]
3360 _FIELDS_DYNAMIC = utils.FieldSet(
3362 "mtotal", "mnode", "mfree",
3364 "ctotal", "cnodes", "csockets",
3367 _FIELDS_STATIC = utils.FieldSet(*[
3368 "pinst_cnt", "sinst_cnt",
3369 "pinst_list", "sinst_list",
3370 "pip", "sip", "tags",
3372 "role"] + _SIMPLE_FIELDS
3375 def CheckArguments(self):
3376 _CheckOutputFields(static=self._FIELDS_STATIC,
3377 dynamic=self._FIELDS_DYNAMIC,
3378 selected=self.op.output_fields)
3380 def ExpandNames(self):
3381 self.needed_locks = {}
3382 self.share_locks[locking.LEVEL_NODE] = 1
3385 self.wanted = _GetWantedNodes(self, self.op.names)
3387 self.wanted = locking.ALL_SET
3389 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3390 self.do_locking = self.do_node_query and self.op.use_locking
3392 # if we don't request only static fields, we need to lock the nodes
3393 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3395 def Exec(self, feedback_fn):
3396 """Computes the list of nodes and their attributes.
3399 all_info = self.cfg.GetAllNodesInfo()
3401 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3402 elif self.wanted != locking.ALL_SET:
3403 nodenames = self.wanted
3404 missing = set(nodenames).difference(all_info.keys())
3406 raise errors.OpExecError(
3407 "Some nodes were removed before retrieving their data: %s" % missing)
3409 nodenames = all_info.keys()
3411 nodenames = utils.NiceSort(nodenames)
3412 nodelist = [all_info[name] for name in nodenames]
3414 # begin data gathering
3416 if self.do_node_query:
3418 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3419 self.cfg.GetHypervisorType())
3420 for name in nodenames:
3421 nodeinfo = node_data[name]
3422 if not nodeinfo.fail_msg and nodeinfo.payload:
3423 nodeinfo = nodeinfo.payload
3424 fn = utils.TryConvert
3426 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3427 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3428 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3429 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3430 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3431 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3432 "bootid": nodeinfo.get('bootid', None),
3433 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3434 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3437 live_data[name] = {}
3439 live_data = dict.fromkeys(nodenames, {})
3441 node_to_primary = dict([(name, set()) for name in nodenames])
3442 node_to_secondary = dict([(name, set()) for name in nodenames])
3444 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3445 "sinst_cnt", "sinst_list"))
3446 if inst_fields & frozenset(self.op.output_fields):
3447 inst_data = self.cfg.GetAllInstancesInfo()
3449 for inst in inst_data.values():
3450 if inst.primary_node in node_to_primary:
3451 node_to_primary[inst.primary_node].add(inst.name)
3452 for secnode in inst.secondary_nodes:
3453 if secnode in node_to_secondary:
3454 node_to_secondary[secnode].add(inst.name)
3456 master_node = self.cfg.GetMasterNode()
3458 # end data gathering
3461 for node in nodelist:
3463 for field in self.op.output_fields:
3464 if field in self._SIMPLE_FIELDS:
3465 val = getattr(node, field)
3466 elif field == "pinst_list":
3467 val = list(node_to_primary[node.name])
3468 elif field == "sinst_list":
3469 val = list(node_to_secondary[node.name])
3470 elif field == "pinst_cnt":
3471 val = len(node_to_primary[node.name])
3472 elif field == "sinst_cnt":
3473 val = len(node_to_secondary[node.name])
3474 elif field == "pip":
3475 val = node.primary_ip
3476 elif field == "sip":
3477 val = node.secondary_ip
3478 elif field == "tags":
3479 val = list(node.GetTags())
3480 elif field == "master":
3481 val = node.name == master_node
3482 elif self._FIELDS_DYNAMIC.Matches(field):
3483 val = live_data[node.name].get(field, None)
3484 elif field == "role":
3485 if node.name == master_node:
3487 elif node.master_candidate:
3496 raise errors.ParameterError(field)
3497 node_output.append(val)
3498 output.append(node_output)
3503 class LUQueryNodeVolumes(NoHooksLU):
3504 """Logical unit for getting volumes on node(s).
3508 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3509 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3512 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3513 _FIELDS_STATIC = utils.FieldSet("node")
3515 def CheckArguments(self):
3516 _CheckOutputFields(static=self._FIELDS_STATIC,
3517 dynamic=self._FIELDS_DYNAMIC,
3518 selected=self.op.output_fields)
3520 def ExpandNames(self):
3521 self.needed_locks = {}
3522 self.share_locks[locking.LEVEL_NODE] = 1
3523 if not self.op.nodes:
3524 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3526 self.needed_locks[locking.LEVEL_NODE] = \
3527 _GetWantedNodes(self, self.op.nodes)
3529 def Exec(self, feedback_fn):
3530 """Computes the list of nodes and their attributes.
3533 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3534 volumes = self.rpc.call_node_volumes(nodenames)
3536 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3537 in self.cfg.GetInstanceList()]
3539 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3542 for node in nodenames:
3543 nresult = volumes[node]
3546 msg = nresult.fail_msg
3548 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3551 node_vols = nresult.payload[:]
3552 node_vols.sort(key=lambda vol: vol['dev'])
3554 for vol in node_vols:
3556 for field in self.op.output_fields:
3559 elif field == "phys":
3563 elif field == "name":
3565 elif field == "size":
3566 val = int(float(vol['size']))
3567 elif field == "instance":
3569 if node not in lv_by_node[inst]:
3571 if vol['name'] in lv_by_node[inst][node]:
3577 raise errors.ParameterError(field)
3578 node_output.append(str(val))
3580 output.append(node_output)
3585 class LUQueryNodeStorage(NoHooksLU):
3586 """Logical unit for getting information on storage units on node(s).
3589 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3591 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3592 ("storage_type", _NoDefault, _CheckStorageType),
3593 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3594 ("name", None, _TMaybeString),
3598 def CheckArguments(self):
3599 _CheckOutputFields(static=self._FIELDS_STATIC,
3600 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3601 selected=self.op.output_fields)
3603 def ExpandNames(self):
3604 self.needed_locks = {}
3605 self.share_locks[locking.LEVEL_NODE] = 1
3608 self.needed_locks[locking.LEVEL_NODE] = \
3609 _GetWantedNodes(self, self.op.nodes)
3611 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3613 def Exec(self, feedback_fn):
3614 """Computes the list of nodes and their attributes.
3617 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3619 # Always get name to sort by
3620 if constants.SF_NAME in self.op.output_fields:
3621 fields = self.op.output_fields[:]
3623 fields = [constants.SF_NAME] + self.op.output_fields
3625 # Never ask for node or type as it's only known to the LU
3626 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3627 while extra in fields:
3628 fields.remove(extra)
3630 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3631 name_idx = field_idx[constants.SF_NAME]
3633 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3634 data = self.rpc.call_storage_list(self.nodes,
3635 self.op.storage_type, st_args,
3636 self.op.name, fields)
3640 for node in utils.NiceSort(self.nodes):
3641 nresult = data[node]
3645 msg = nresult.fail_msg
3647 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3650 rows = dict([(row[name_idx], row) for row in nresult.payload])
3652 for name in utils.NiceSort(rows.keys()):
3657 for field in self.op.output_fields:
3658 if field == constants.SF_NODE:
3660 elif field == constants.SF_TYPE:
3661 val = self.op.storage_type
3662 elif field in field_idx:
3663 val = row[field_idx[field]]
3665 raise errors.ParameterError(field)
3674 class LUModifyNodeStorage(NoHooksLU):
3675 """Logical unit for modifying a storage volume on a node.
3680 ("storage_type", _NoDefault, _CheckStorageType),
3681 ("name", _NoDefault, _TNonEmptyString),
3682 ("changes", _NoDefault, _TDict),
3686 def CheckArguments(self):
3687 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3689 storage_type = self.op.storage_type
3692 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3694 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3695 " modified" % storage_type,
3698 diff = set(self.op.changes.keys()) - modifiable
3700 raise errors.OpPrereqError("The following fields can not be modified for"
3701 " storage units of type '%s': %r" %
3702 (storage_type, list(diff)),
3705 def ExpandNames(self):
3706 self.needed_locks = {
3707 locking.LEVEL_NODE: self.op.node_name,
3710 def Exec(self, feedback_fn):
3711 """Computes the list of nodes and their attributes.
3714 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3715 result = self.rpc.call_storage_modify(self.op.node_name,
3716 self.op.storage_type, st_args,
3717 self.op.name, self.op.changes)
3718 result.Raise("Failed to modify storage unit '%s' on %s" %
3719 (self.op.name, self.op.node_name))
3722 class LUAddNode(LogicalUnit):
3723 """Logical unit for adding node to the cluster.
3727 HTYPE = constants.HTYPE_NODE
3730 ("primary_ip", None, _NoType),
3731 ("secondary_ip", None, _TMaybeString),
3732 ("readd", False, _TBool),
3733 ("nodegroup", None, _TMaybeString)
3736 def CheckArguments(self):
3737 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3738 # validate/normalize the node name
3739 self.hostname = netutils.GetHostname(name=self.op.node_name,
3740 family=self.primary_ip_family)
3741 self.op.node_name = self.hostname.name
3742 if self.op.readd and self.op.nodegroup:
3743 raise errors.OpPrereqError("Cannot pass a nodegroup when a node is"
3744 " being readded", errors.ECODE_INVAL)
3746 def BuildHooksEnv(self):
3749 This will run on all nodes before, and on all nodes + the new node after.
3753 "OP_TARGET": self.op.node_name,
3754 "NODE_NAME": self.op.node_name,
3755 "NODE_PIP": self.op.primary_ip,
3756 "NODE_SIP": self.op.secondary_ip,
3758 nodes_0 = self.cfg.GetNodeList()
3759 nodes_1 = nodes_0 + [self.op.node_name, ]
3760 return env, nodes_0, nodes_1
3762 def CheckPrereq(self):
3763 """Check prerequisites.
3766 - the new node is not already in the config
3768 - its parameters (single/dual homed) matches the cluster
3770 Any errors are signaled by raising errors.OpPrereqError.
3774 hostname = self.hostname
3775 node = hostname.name
3776 primary_ip = self.op.primary_ip = hostname.ip
3777 if self.op.secondary_ip is None:
3778 if self.primary_ip_family == netutils.IP6Address.family:
3779 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3780 " IPv4 address must be given as secondary",
3782 self.op.secondary_ip = primary_ip
3784 secondary_ip = self.op.secondary_ip
3785 if not netutils.IP4Address.IsValid(secondary_ip):
3786 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3787 " address" % secondary_ip, errors.ECODE_INVAL)
3789 node_list = cfg.GetNodeList()
3790 if not self.op.readd and node in node_list:
3791 raise errors.OpPrereqError("Node %s is already in the configuration" %
3792 node, errors.ECODE_EXISTS)
3793 elif self.op.readd and node not in node_list:
3794 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3797 self.changed_primary_ip = False
3799 for existing_node_name in node_list:
3800 existing_node = cfg.GetNodeInfo(existing_node_name)
3802 if self.op.readd and node == existing_node_name:
3803 if existing_node.secondary_ip != secondary_ip:
3804 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3805 " address configuration as before",
3807 if existing_node.primary_ip != primary_ip:
3808 self.changed_primary_ip = True
3812 if (existing_node.primary_ip == primary_ip or
3813 existing_node.secondary_ip == primary_ip or
3814 existing_node.primary_ip == secondary_ip or
3815 existing_node.secondary_ip == secondary_ip):
3816 raise errors.OpPrereqError("New node ip address(es) conflict with"
3817 " existing node %s" % existing_node.name,
3818 errors.ECODE_NOTUNIQUE)
3820 # check that the type of the node (single versus dual homed) is the
3821 # same as for the master
3822 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3823 master_singlehomed = myself.secondary_ip == myself.primary_ip
3824 newbie_singlehomed = secondary_ip == primary_ip
3825 if master_singlehomed != newbie_singlehomed:
3826 if master_singlehomed:
3827 raise errors.OpPrereqError("The master has no private ip but the"
3828 " new node has one",
3831 raise errors.OpPrereqError("The master has a private ip but the"
3832 " new node doesn't have one",
3835 # checks reachability
3836 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3837 raise errors.OpPrereqError("Node not reachable by ping",
3838 errors.ECODE_ENVIRON)
3840 if not newbie_singlehomed:
3841 # check reachability from my secondary ip to newbie's secondary ip
3842 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3843 source=myself.secondary_ip):
3844 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3845 " based ping to noded port",
3846 errors.ECODE_ENVIRON)
3853 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3856 self.new_node = self.cfg.GetNodeInfo(node)
3857 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3859 nodegroup = cfg.LookupNodeGroup(self.op.nodegroup)
3860 self.new_node = objects.Node(name=node,
3861 primary_ip=primary_ip,
3862 secondary_ip=secondary_ip,
3863 master_candidate=self.master_candidate,
3864 offline=False, drained=False,
3865 nodegroup=nodegroup)
3867 def Exec(self, feedback_fn):
3868 """Adds the new node to the cluster.
3871 new_node = self.new_node
3872 node = new_node.name
3874 # for re-adds, reset the offline/drained/master-candidate flags;
3875 # we need to reset here, otherwise offline would prevent RPC calls
3876 # later in the procedure; this also means that if the re-add
3877 # fails, we are left with a non-offlined, broken node
3879 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3880 self.LogInfo("Readding a node, the offline/drained flags were reset")
3881 # if we demote the node, we do cleanup later in the procedure
3882 new_node.master_candidate = self.master_candidate
3883 if self.changed_primary_ip:
3884 new_node.primary_ip = self.op.primary_ip
3886 # notify the user about any possible mc promotion
3887 if new_node.master_candidate:
3888 self.LogInfo("Node will be a master candidate")
3890 # check connectivity
3891 result = self.rpc.call_version([node])[node]
3892 result.Raise("Can't get version information from node %s" % node)
3893 if constants.PROTOCOL_VERSION == result.payload:
3894 logging.info("Communication to node %s fine, sw version %s match",
3895 node, result.payload)
3897 raise errors.OpExecError("Version mismatch master version %s,"
3898 " node version %s" %
3899 (constants.PROTOCOL_VERSION, result.payload))
3901 # Add node to our /etc/hosts, and add key to known_hosts
3902 if self.cfg.GetClusterInfo().modify_etc_hosts:
3903 master_node = self.cfg.GetMasterNode()
3904 result = self.rpc.call_etc_hosts_modify(master_node,
3905 constants.ETC_HOSTS_ADD,
3908 result.Raise("Can't update hosts file with new host data")
3910 if new_node.secondary_ip != new_node.primary_ip:
3911 result = self.rpc.call_node_has_ip_address(new_node.name,
3912 new_node.secondary_ip)
3913 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3914 prereq=True, ecode=errors.ECODE_ENVIRON)
3915 if not result.payload:
3916 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3917 " you gave (%s). Please fix and re-run this"
3918 " command." % new_node.secondary_ip)
3920 node_verify_list = [self.cfg.GetMasterNode()]
3921 node_verify_param = {
3922 constants.NV_NODELIST: [node],
3923 # TODO: do a node-net-test as well?
3926 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3927 self.cfg.GetClusterName())
3928 for verifier in node_verify_list:
3929 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3930 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3932 for failed in nl_payload:
3933 feedback_fn("ssh/hostname verification failed"
3934 " (checking from %s): %s" %
3935 (verifier, nl_payload[failed]))
3936 raise errors.OpExecError("ssh/hostname verification failed.")
3939 _RedistributeAncillaryFiles(self)
3940 self.context.ReaddNode(new_node)
3941 # make sure we redistribute the config
3942 self.cfg.Update(new_node, feedback_fn)
3943 # and make sure the new node will not have old files around
3944 if not new_node.master_candidate:
3945 result = self.rpc.call_node_demote_from_mc(new_node.name)
3946 msg = result.fail_msg
3948 self.LogWarning("Node failed to demote itself from master"
3949 " candidate status: %s" % msg)
3951 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3952 self.context.AddNode(new_node, self.proc.GetECId())
3955 class LUSetNodeParams(LogicalUnit):
3956 """Modifies the parameters of a node.
3959 HPATH = "node-modify"
3960 HTYPE = constants.HTYPE_NODE
3963 ("master_candidate", None, _TMaybeBool),
3964 ("offline", None, _TMaybeBool),
3965 ("drained", None, _TMaybeBool),
3966 ("auto_promote", False, _TBool),
3971 def CheckArguments(self):
3972 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3973 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3974 if all_mods.count(None) == 3:
3975 raise errors.OpPrereqError("Please pass at least one modification",
3977 if all_mods.count(True) > 1:
3978 raise errors.OpPrereqError("Can't set the node into more than one"
3979 " state at the same time",
3982 # Boolean value that tells us whether we're offlining or draining the node
3983 self.offline_or_drain = (self.op.offline == True or
3984 self.op.drained == True)
3985 self.deoffline_or_drain = (self.op.offline == False or
3986 self.op.drained == False)
3987 self.might_demote = (self.op.master_candidate == False or
3988 self.offline_or_drain)
3990 self.lock_all = self.op.auto_promote and self.might_demote
3993 def ExpandNames(self):
3995 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3997 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3999 def BuildHooksEnv(self):
4002 This runs on the master node.
4006 "OP_TARGET": self.op.node_name,
4007 "MASTER_CANDIDATE": str(self.op.master_candidate),
4008 "OFFLINE": str(self.op.offline),
4009 "DRAINED": str(self.op.drained),
4011 nl = [self.cfg.GetMasterNode(),
4015 def CheckPrereq(self):
4016 """Check prerequisites.
4018 This only checks the instance list against the existing names.
4021 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4023 if (self.op.master_candidate is not None or
4024 self.op.drained is not None or
4025 self.op.offline is not None):
4026 # we can't change the master's node flags
4027 if self.op.node_name == self.cfg.GetMasterNode():
4028 raise errors.OpPrereqError("The master role can be changed"
4029 " only via master-failover",
4033 if node.master_candidate and self.might_demote and not self.lock_all:
4034 assert not self.op.auto_promote, "auto-promote set but lock_all not"
4035 # check if after removing the current node, we're missing master
4037 (mc_remaining, mc_should, _) = \
4038 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4039 if mc_remaining < mc_should:
4040 raise errors.OpPrereqError("Not enough master candidates, please"
4041 " pass auto_promote to allow promotion",
4044 if (self.op.master_candidate == True and
4045 ((node.offline and not self.op.offline == False) or
4046 (node.drained and not self.op.drained == False))):
4047 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4048 " to master_candidate" % node.name,
4051 # If we're being deofflined/drained, we'll MC ourself if needed
4052 if (self.deoffline_or_drain and not self.offline_or_drain and not
4053 self.op.master_candidate == True and not node.master_candidate):
4054 self.op.master_candidate = _DecideSelfPromotion(self)
4055 if self.op.master_candidate:
4056 self.LogInfo("Autopromoting node to master candidate")
4060 def Exec(self, feedback_fn):
4069 if self.op.offline is not None:
4070 node.offline = self.op.offline
4071 result.append(("offline", str(self.op.offline)))
4072 if self.op.offline == True:
4073 if node.master_candidate:
4074 node.master_candidate = False
4076 result.append(("master_candidate", "auto-demotion due to offline"))
4078 node.drained = False
4079 result.append(("drained", "clear drained status due to offline"))
4081 if self.op.master_candidate is not None:
4082 node.master_candidate = self.op.master_candidate
4084 result.append(("master_candidate", str(self.op.master_candidate)))
4085 if self.op.master_candidate == False:
4086 rrc = self.rpc.call_node_demote_from_mc(node.name)
4089 self.LogWarning("Node failed to demote itself: %s" % msg)
4091 if self.op.drained is not None:
4092 node.drained = self.op.drained
4093 result.append(("drained", str(self.op.drained)))
4094 if self.op.drained == True:
4095 if node.master_candidate:
4096 node.master_candidate = False
4098 result.append(("master_candidate", "auto-demotion due to drain"))
4099 rrc = self.rpc.call_node_demote_from_mc(node.name)
4102 self.LogWarning("Node failed to demote itself: %s" % msg)
4104 node.offline = False
4105 result.append(("offline", "clear offline status due to drain"))
4107 # we locked all nodes, we adjust the CP before updating this node
4109 _AdjustCandidatePool(self, [node.name])
4111 # this will trigger configuration file update, if needed
4112 self.cfg.Update(node, feedback_fn)
4114 # this will trigger job queue propagation or cleanup
4116 self.context.ReaddNode(node)
4121 class LUPowercycleNode(NoHooksLU):
4122 """Powercycles a node.
4131 def CheckArguments(self):
4132 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4133 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4134 raise errors.OpPrereqError("The node is the master and the force"
4135 " parameter was not set",
4138 def ExpandNames(self):
4139 """Locking for PowercycleNode.
4141 This is a last-resort option and shouldn't block on other
4142 jobs. Therefore, we grab no locks.
4145 self.needed_locks = {}
4147 def Exec(self, feedback_fn):
4151 result = self.rpc.call_node_powercycle(self.op.node_name,
4152 self.cfg.GetHypervisorType())
4153 result.Raise("Failed to schedule the reboot")
4154 return result.payload
4157 class LUQueryClusterInfo(NoHooksLU):
4158 """Query cluster configuration.
4163 def ExpandNames(self):
4164 self.needed_locks = {}
4166 def Exec(self, feedback_fn):
4167 """Return cluster config.
4170 cluster = self.cfg.GetClusterInfo()
4173 # Filter just for enabled hypervisors
4174 for os_name, hv_dict in cluster.os_hvp.items():
4175 os_hvp[os_name] = {}
4176 for hv_name, hv_params in hv_dict.items():
4177 if hv_name in cluster.enabled_hypervisors:
4178 os_hvp[os_name][hv_name] = hv_params
4180 # Convert ip_family to ip_version
4181 primary_ip_version = constants.IP4_VERSION
4182 if cluster.primary_ip_family == netutils.IP6Address.family:
4183 primary_ip_version = constants.IP6_VERSION
4186 "software_version": constants.RELEASE_VERSION,
4187 "protocol_version": constants.PROTOCOL_VERSION,
4188 "config_version": constants.CONFIG_VERSION,
4189 "os_api_version": max(constants.OS_API_VERSIONS),
4190 "export_version": constants.EXPORT_VERSION,
4191 "architecture": (platform.architecture()[0], platform.machine()),
4192 "name": cluster.cluster_name,
4193 "master": cluster.master_node,
4194 "default_hypervisor": cluster.enabled_hypervisors[0],
4195 "enabled_hypervisors": cluster.enabled_hypervisors,
4196 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4197 for hypervisor_name in cluster.enabled_hypervisors]),
4199 "beparams": cluster.beparams,
4200 "osparams": cluster.osparams,
4201 "nicparams": cluster.nicparams,
4202 "candidate_pool_size": cluster.candidate_pool_size,
4203 "master_netdev": cluster.master_netdev,
4204 "volume_group_name": cluster.volume_group_name,
4205 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4206 "file_storage_dir": cluster.file_storage_dir,
4207 "maintain_node_health": cluster.maintain_node_health,
4208 "ctime": cluster.ctime,
4209 "mtime": cluster.mtime,
4210 "uuid": cluster.uuid,
4211 "tags": list(cluster.GetTags()),
4212 "uid_pool": cluster.uid_pool,
4213 "default_iallocator": cluster.default_iallocator,
4214 "reserved_lvs": cluster.reserved_lvs,
4215 "primary_ip_version": primary_ip_version,
4221 class LUQueryConfigValues(NoHooksLU):
4222 """Return configuration values.
4225 _OP_PARAMS = [_POutputFields]
4227 _FIELDS_DYNAMIC = utils.FieldSet()
4228 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4231 def CheckArguments(self):
4232 _CheckOutputFields(static=self._FIELDS_STATIC,
4233 dynamic=self._FIELDS_DYNAMIC,
4234 selected=self.op.output_fields)
4236 def ExpandNames(self):
4237 self.needed_locks = {}
4239 def Exec(self, feedback_fn):
4240 """Dump a representation of the cluster config to the standard output.
4244 for field in self.op.output_fields:
4245 if field == "cluster_name":
4246 entry = self.cfg.GetClusterName()
4247 elif field == "master_node":
4248 entry = self.cfg.GetMasterNode()
4249 elif field == "drain_flag":
4250 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4251 elif field == "watcher_pause":
4252 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4254 raise errors.ParameterError(field)
4255 values.append(entry)
4259 class LUActivateInstanceDisks(NoHooksLU):
4260 """Bring up an instance's disks.
4265 ("ignore_size", False, _TBool),
4269 def ExpandNames(self):
4270 self._ExpandAndLockInstance()
4271 self.needed_locks[locking.LEVEL_NODE] = []
4272 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4274 def DeclareLocks(self, level):
4275 if level == locking.LEVEL_NODE:
4276 self._LockInstancesNodes()
4278 def CheckPrereq(self):
4279 """Check prerequisites.
4281 This checks that the instance is in the cluster.
4284 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4285 assert self.instance is not None, \
4286 "Cannot retrieve locked instance %s" % self.op.instance_name
4287 _CheckNodeOnline(self, self.instance.primary_node)
4289 def Exec(self, feedback_fn):
4290 """Activate the disks.
4293 disks_ok, disks_info = \
4294 _AssembleInstanceDisks(self, self.instance,
4295 ignore_size=self.op.ignore_size)
4297 raise errors.OpExecError("Cannot activate block devices")
4302 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4304 """Prepare the block devices for an instance.
4306 This sets up the block devices on all nodes.
4308 @type lu: L{LogicalUnit}
4309 @param lu: the logical unit on whose behalf we execute
4310 @type instance: L{objects.Instance}
4311 @param instance: the instance for whose disks we assemble
4312 @type disks: list of L{objects.Disk} or None
4313 @param disks: which disks to assemble (or all, if None)
4314 @type ignore_secondaries: boolean
4315 @param ignore_secondaries: if true, errors on secondary nodes
4316 won't result in an error return from the function
4317 @type ignore_size: boolean
4318 @param ignore_size: if true, the current known size of the disk
4319 will not be used during the disk activation, useful for cases
4320 when the size is wrong
4321 @return: False if the operation failed, otherwise a list of
4322 (host, instance_visible_name, node_visible_name)
4323 with the mapping from node devices to instance devices
4328 iname = instance.name
4329 disks = _ExpandCheckDisks(instance, disks)
4331 # With the two passes mechanism we try to reduce the window of
4332 # opportunity for the race condition of switching DRBD to primary
4333 # before handshaking occured, but we do not eliminate it
4335 # The proper fix would be to wait (with some limits) until the
4336 # connection has been made and drbd transitions from WFConnection
4337 # into any other network-connected state (Connected, SyncTarget,
4340 # 1st pass, assemble on all nodes in secondary mode
4341 for inst_disk in disks:
4342 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4344 node_disk = node_disk.Copy()
4345 node_disk.UnsetSize()
4346 lu.cfg.SetDiskID(node_disk, node)
4347 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4348 msg = result.fail_msg
4350 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4351 " (is_primary=False, pass=1): %s",
4352 inst_disk.iv_name, node, msg)
4353 if not ignore_secondaries:
4356 # FIXME: race condition on drbd migration to primary
4358 # 2nd pass, do only the primary node
4359 for inst_disk in disks:
4362 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4363 if node != instance.primary_node:
4366 node_disk = node_disk.Copy()
4367 node_disk.UnsetSize()
4368 lu.cfg.SetDiskID(node_disk, node)
4369 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4370 msg = result.fail_msg
4372 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4373 " (is_primary=True, pass=2): %s",
4374 inst_disk.iv_name, node, msg)
4377 dev_path = result.payload
4379 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4381 # leave the disks configured for the primary node
4382 # this is a workaround that would be fixed better by
4383 # improving the logical/physical id handling
4385 lu.cfg.SetDiskID(disk, instance.primary_node)
4387 return disks_ok, device_info
4390 def _StartInstanceDisks(lu, instance, force):
4391 """Start the disks of an instance.
4394 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4395 ignore_secondaries=force)
4397 _ShutdownInstanceDisks(lu, instance)
4398 if force is not None and not force:
4399 lu.proc.LogWarning("", hint="If the message above refers to a"
4401 " you can retry the operation using '--force'.")
4402 raise errors.OpExecError("Disk consistency error")
4405 class LUDeactivateInstanceDisks(NoHooksLU):
4406 """Shutdown an instance's disks.
4414 def ExpandNames(self):
4415 self._ExpandAndLockInstance()
4416 self.needed_locks[locking.LEVEL_NODE] = []
4417 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4419 def DeclareLocks(self, level):
4420 if level == locking.LEVEL_NODE:
4421 self._LockInstancesNodes()
4423 def CheckPrereq(self):
4424 """Check prerequisites.
4426 This checks that the instance is in the cluster.
4429 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4430 assert self.instance is not None, \
4431 "Cannot retrieve locked instance %s" % self.op.instance_name
4433 def Exec(self, feedback_fn):
4434 """Deactivate the disks
4437 instance = self.instance
4438 _SafeShutdownInstanceDisks(self, instance)
4441 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4442 """Shutdown block devices of an instance.
4444 This function checks if an instance is running, before calling
4445 _ShutdownInstanceDisks.
4448 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4449 _ShutdownInstanceDisks(lu, instance, disks=disks)
4452 def _ExpandCheckDisks(instance, disks):
4453 """Return the instance disks selected by the disks list
4455 @type disks: list of L{objects.Disk} or None
4456 @param disks: selected disks
4457 @rtype: list of L{objects.Disk}
4458 @return: selected instance disks to act on
4462 return instance.disks
4464 if not set(disks).issubset(instance.disks):
4465 raise errors.ProgrammerError("Can only act on disks belonging to the"
4470 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4471 """Shutdown block devices of an instance.
4473 This does the shutdown on all nodes of the instance.
4475 If the ignore_primary is false, errors on the primary node are
4480 disks = _ExpandCheckDisks(instance, disks)
4483 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4484 lu.cfg.SetDiskID(top_disk, node)
4485 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4486 msg = result.fail_msg
4488 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4489 disk.iv_name, node, msg)
4490 if not ignore_primary or node != instance.primary_node:
4495 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4496 """Checks if a node has enough free memory.
4498 This function check if a given node has the needed amount of free
4499 memory. In case the node has less memory or we cannot get the
4500 information from the node, this function raise an OpPrereqError
4503 @type lu: C{LogicalUnit}
4504 @param lu: a logical unit from which we get configuration data
4506 @param node: the node to check
4507 @type reason: C{str}
4508 @param reason: string to use in the error message
4509 @type requested: C{int}
4510 @param requested: the amount of memory in MiB to check for
4511 @type hypervisor_name: C{str}
4512 @param hypervisor_name: the hypervisor to ask for memory stats
4513 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4514 we cannot check the node
4517 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4518 nodeinfo[node].Raise("Can't get data from node %s" % node,
4519 prereq=True, ecode=errors.ECODE_ENVIRON)
4520 free_mem = nodeinfo[node].payload.get('memory_free', None)
4521 if not isinstance(free_mem, int):
4522 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4523 " was '%s'" % (node, free_mem),
4524 errors.ECODE_ENVIRON)
4525 if requested > free_mem:
4526 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4527 " needed %s MiB, available %s MiB" %
4528 (node, reason, requested, free_mem),
4532 def _CheckNodesFreeDisk(lu, nodenames, requested):
4533 """Checks if nodes have enough free disk space in the default VG.
4535 This function check if all given nodes have the needed amount of
4536 free disk. In case any node has less disk or we cannot get the
4537 information from the node, this function raise an OpPrereqError
4540 @type lu: C{LogicalUnit}
4541 @param lu: a logical unit from which we get configuration data
4542 @type nodenames: C{list}
4543 @param nodenames: the list of node names to check
4544 @type requested: C{int}
4545 @param requested: the amount of disk in MiB to check for
4546 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4547 we cannot check the node
4550 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4551 lu.cfg.GetHypervisorType())
4552 for node in nodenames:
4553 info = nodeinfo[node]
4554 info.Raise("Cannot get current information from node %s" % node,
4555 prereq=True, ecode=errors.ECODE_ENVIRON)
4556 vg_free = info.payload.get("vg_free", None)
4557 if not isinstance(vg_free, int):
4558 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4559 " result was '%s'" % (node, vg_free),
4560 errors.ECODE_ENVIRON)
4561 if requested > vg_free:
4562 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4563 " required %d MiB, available %d MiB" %
4564 (node, requested, vg_free),
4568 class LUStartupInstance(LogicalUnit):
4569 """Starts an instance.
4572 HPATH = "instance-start"
4573 HTYPE = constants.HTYPE_INSTANCE
4577 ("hvparams", _EmptyDict, _TDict),
4578 ("beparams", _EmptyDict, _TDict),
4582 def CheckArguments(self):
4584 if self.op.beparams:
4585 # fill the beparams dict
4586 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4588 def ExpandNames(self):
4589 self._ExpandAndLockInstance()
4591 def BuildHooksEnv(self):
4594 This runs on master, primary and secondary nodes of the instance.
4598 "FORCE": self.op.force,
4600 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4601 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4604 def CheckPrereq(self):
4605 """Check prerequisites.
4607 This checks that the instance is in the cluster.
4610 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4611 assert self.instance is not None, \
4612 "Cannot retrieve locked instance %s" % self.op.instance_name
4615 if self.op.hvparams:
4616 # check hypervisor parameter syntax (locally)
4617 cluster = self.cfg.GetClusterInfo()
4618 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4619 filled_hvp = cluster.FillHV(instance)
4620 filled_hvp.update(self.op.hvparams)
4621 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4622 hv_type.CheckParameterSyntax(filled_hvp)
4623 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4625 _CheckNodeOnline(self, instance.primary_node)
4627 bep = self.cfg.GetClusterInfo().FillBE(instance)
4628 # check bridges existence
4629 _CheckInstanceBridgesExist(self, instance)
4631 remote_info = self.rpc.call_instance_info(instance.primary_node,
4633 instance.hypervisor)
4634 remote_info.Raise("Error checking node %s" % instance.primary_node,
4635 prereq=True, ecode=errors.ECODE_ENVIRON)
4636 if not remote_info.payload: # not running already
4637 _CheckNodeFreeMemory(self, instance.primary_node,
4638 "starting instance %s" % instance.name,
4639 bep[constants.BE_MEMORY], instance.hypervisor)
4641 def Exec(self, feedback_fn):
4642 """Start the instance.
4645 instance = self.instance
4646 force = self.op.force
4648 self.cfg.MarkInstanceUp(instance.name)
4650 node_current = instance.primary_node
4652 _StartInstanceDisks(self, instance, force)
4654 result = self.rpc.call_instance_start(node_current, instance,
4655 self.op.hvparams, self.op.beparams)
4656 msg = result.fail_msg
4658 _ShutdownInstanceDisks(self, instance)
4659 raise errors.OpExecError("Could not start instance: %s" % msg)
4662 class LURebootInstance(LogicalUnit):
4663 """Reboot an instance.
4666 HPATH = "instance-reboot"
4667 HTYPE = constants.HTYPE_INSTANCE
4670 ("ignore_secondaries", False, _TBool),
4671 ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4676 def ExpandNames(self):
4677 self._ExpandAndLockInstance()
4679 def BuildHooksEnv(self):
4682 This runs on master, primary and secondary nodes of the instance.
4686 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4687 "REBOOT_TYPE": self.op.reboot_type,
4688 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4690 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4691 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4694 def CheckPrereq(self):
4695 """Check prerequisites.
4697 This checks that the instance is in the cluster.
4700 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4701 assert self.instance is not None, \
4702 "Cannot retrieve locked instance %s" % self.op.instance_name
4704 _CheckNodeOnline(self, instance.primary_node)
4706 # check bridges existence
4707 _CheckInstanceBridgesExist(self, instance)
4709 def Exec(self, feedback_fn):
4710 """Reboot the instance.
4713 instance = self.instance
4714 ignore_secondaries = self.op.ignore_secondaries
4715 reboot_type = self.op.reboot_type
4717 node_current = instance.primary_node
4719 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4720 constants.INSTANCE_REBOOT_HARD]:
4721 for disk in instance.disks:
4722 self.cfg.SetDiskID(disk, node_current)
4723 result = self.rpc.call_instance_reboot(node_current, instance,
4725 self.op.shutdown_timeout)
4726 result.Raise("Could not reboot instance")
4728 result = self.rpc.call_instance_shutdown(node_current, instance,
4729 self.op.shutdown_timeout)
4730 result.Raise("Could not shutdown instance for full reboot")
4731 _ShutdownInstanceDisks(self, instance)
4732 _StartInstanceDisks(self, instance, ignore_secondaries)
4733 result = self.rpc.call_instance_start(node_current, instance, None, None)
4734 msg = result.fail_msg
4736 _ShutdownInstanceDisks(self, instance)
4737 raise errors.OpExecError("Could not start instance for"
4738 " full reboot: %s" % msg)
4740 self.cfg.MarkInstanceUp(instance.name)
4743 class LUShutdownInstance(LogicalUnit):
4744 """Shutdown an instance.
4747 HPATH = "instance-stop"
4748 HTYPE = constants.HTYPE_INSTANCE
4751 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4755 def ExpandNames(self):
4756 self._ExpandAndLockInstance()
4758 def BuildHooksEnv(self):
4761 This runs on master, primary and secondary nodes of the instance.
4764 env = _BuildInstanceHookEnvByObject(self, self.instance)
4765 env["TIMEOUT"] = self.op.timeout
4766 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4769 def CheckPrereq(self):
4770 """Check prerequisites.
4772 This checks that the instance is in the cluster.
4775 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4776 assert self.instance is not None, \
4777 "Cannot retrieve locked instance %s" % self.op.instance_name
4778 _CheckNodeOnline(self, self.instance.primary_node)
4780 def Exec(self, feedback_fn):
4781 """Shutdown the instance.
4784 instance = self.instance
4785 node_current = instance.primary_node
4786 timeout = self.op.timeout
4787 self.cfg.MarkInstanceDown(instance.name)
4788 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4789 msg = result.fail_msg
4791 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4793 _ShutdownInstanceDisks(self, instance)
4796 class LUReinstallInstance(LogicalUnit):
4797 """Reinstall an instance.
4800 HPATH = "instance-reinstall"
4801 HTYPE = constants.HTYPE_INSTANCE
4804 ("os_type", None, _TMaybeString),
4805 ("force_variant", False, _TBool),
4809 def ExpandNames(self):
4810 self._ExpandAndLockInstance()
4812 def BuildHooksEnv(self):
4815 This runs on master, primary and secondary nodes of the instance.
4818 env = _BuildInstanceHookEnvByObject(self, self.instance)
4819 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4822 def CheckPrereq(self):
4823 """Check prerequisites.
4825 This checks that the instance is in the cluster and is not running.
4828 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4829 assert instance is not None, \
4830 "Cannot retrieve locked instance %s" % self.op.instance_name
4831 _CheckNodeOnline(self, instance.primary_node)
4833 if instance.disk_template == constants.DT_DISKLESS:
4834 raise errors.OpPrereqError("Instance '%s' has no disks" %
4835 self.op.instance_name,
4837 _CheckInstanceDown(self, instance, "cannot reinstall")
4839 if self.op.os_type is not None:
4841 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4842 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4844 self.instance = instance
4846 def Exec(self, feedback_fn):
4847 """Reinstall the instance.
4850 inst = self.instance
4852 if self.op.os_type is not None:
4853 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4854 inst.os = self.op.os_type
4855 self.cfg.Update(inst, feedback_fn)
4857 _StartInstanceDisks(self, inst, None)
4859 feedback_fn("Running the instance OS create scripts...")
4860 # FIXME: pass debug option from opcode to backend
4861 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4862 self.op.debug_level)
4863 result.Raise("Could not install OS for instance %s on node %s" %
4864 (inst.name, inst.primary_node))
4866 _ShutdownInstanceDisks(self, inst)
4869 class LURecreateInstanceDisks(LogicalUnit):
4870 """Recreate an instance's missing disks.
4873 HPATH = "instance-recreate-disks"
4874 HTYPE = constants.HTYPE_INSTANCE
4877 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4881 def ExpandNames(self):
4882 self._ExpandAndLockInstance()
4884 def BuildHooksEnv(self):
4887 This runs on master, primary and secondary nodes of the instance.
4890 env = _BuildInstanceHookEnvByObject(self, self.instance)
4891 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4894 def CheckPrereq(self):
4895 """Check prerequisites.
4897 This checks that the instance is in the cluster and is not running.
4900 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4901 assert instance is not None, \
4902 "Cannot retrieve locked instance %s" % self.op.instance_name
4903 _CheckNodeOnline(self, instance.primary_node)
4905 if instance.disk_template == constants.DT_DISKLESS:
4906 raise errors.OpPrereqError("Instance '%s' has no disks" %
4907 self.op.instance_name, errors.ECODE_INVAL)
4908 _CheckInstanceDown(self, instance, "cannot recreate disks")
4910 if not self.op.disks:
4911 self.op.disks = range(len(instance.disks))
4913 for idx in self.op.disks:
4914 if idx >= len(instance.disks):
4915 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4918 self.instance = instance
4920 def Exec(self, feedback_fn):
4921 """Recreate the disks.
4925 for idx, _ in enumerate(self.instance.disks):
4926 if idx not in self.op.disks: # disk idx has not been passed in
4930 _CreateDisks(self, self.instance, to_skip=to_skip)
4933 class LURenameInstance(LogicalUnit):
4934 """Rename an instance.
4937 HPATH = "instance-rename"
4938 HTYPE = constants.HTYPE_INSTANCE
4941 ("new_name", _NoDefault, _TNonEmptyString),
4942 ("ip_check", False, _TBool),
4943 ("name_check", True, _TBool),
4946 def CheckArguments(self):
4950 if self.op.ip_check and not self.op.name_check:
4951 # TODO: make the ip check more flexible and not depend on the name check
4952 raise errors.OpPrereqError("Cannot do ip check without a name check",
4955 def BuildHooksEnv(self):
4958 This runs on master, primary and secondary nodes of the instance.
4961 env = _BuildInstanceHookEnvByObject(self, self.instance)
4962 env["INSTANCE_NEW_NAME"] = self.op.new_name
4963 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4966 def CheckPrereq(self):
4967 """Check prerequisites.
4969 This checks that the instance is in the cluster and is not running.
4972 self.op.instance_name = _ExpandInstanceName(self.cfg,
4973 self.op.instance_name)
4974 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4975 assert instance is not None
4976 _CheckNodeOnline(self, instance.primary_node)
4977 _CheckInstanceDown(self, instance, "cannot rename")
4978 self.instance = instance
4980 new_name = self.op.new_name
4981 if self.op.name_check:
4982 hostname = netutils.GetHostname(name=new_name)
4983 new_name = self.op.new_name = hostname.name
4984 if (self.op.ip_check and
4985 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
4986 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4987 (hostname.ip, new_name),
4988 errors.ECODE_NOTUNIQUE)
4990 instance_list = self.cfg.GetInstanceList()
4991 if new_name in instance_list:
4992 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4993 new_name, errors.ECODE_EXISTS)
4995 def Exec(self, feedback_fn):
4996 """Reinstall the instance.
4999 inst = self.instance
5000 old_name = inst.name
5002 if inst.disk_template == constants.DT_FILE:
5003 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5005 self.cfg.RenameInstance(inst.name, self.op.new_name)
5006 # Change the instance lock. This is definitely safe while we hold the BGL
5007 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5008 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5010 # re-read the instance from the configuration after rename
5011 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5013 if inst.disk_template == constants.DT_FILE:
5014 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5015 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5016 old_file_storage_dir,
5017 new_file_storage_dir)
5018 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5019 " (but the instance has been renamed in Ganeti)" %
5020 (inst.primary_node, old_file_storage_dir,
5021 new_file_storage_dir))
5023 _StartInstanceDisks(self, inst, None)
5025 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5026 old_name, self.op.debug_level)
5027 msg = result.fail_msg
5029 msg = ("Could not run OS rename script for instance %s on node %s"
5030 " (but the instance has been renamed in Ganeti): %s" %
5031 (inst.name, inst.primary_node, msg))
5032 self.proc.LogWarning(msg)
5034 _ShutdownInstanceDisks(self, inst)
5039 class LURemoveInstance(LogicalUnit):
5040 """Remove an instance.
5043 HPATH = "instance-remove"
5044 HTYPE = constants.HTYPE_INSTANCE
5047 ("ignore_failures", False, _TBool),
5052 def ExpandNames(self):
5053 self._ExpandAndLockInstance()
5054 self.needed_locks[locking.LEVEL_NODE] = []
5055 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5057 def DeclareLocks(self, level):
5058 if level == locking.LEVEL_NODE:
5059 self._LockInstancesNodes()
5061 def BuildHooksEnv(self):
5064 This runs on master, primary and secondary nodes of the instance.
5067 env = _BuildInstanceHookEnvByObject(self, self.instance)
5068 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5069 nl = [self.cfg.GetMasterNode()]
5070 nl_post = list(self.instance.all_nodes) + nl
5071 return env, nl, nl_post
5073 def CheckPrereq(self):
5074 """Check prerequisites.
5076 This checks that the instance is in the cluster.
5079 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5080 assert self.instance is not None, \
5081 "Cannot retrieve locked instance %s" % self.op.instance_name
5083 def Exec(self, feedback_fn):
5084 """Remove the instance.
5087 instance = self.instance
5088 logging.info("Shutting down instance %s on node %s",
5089 instance.name, instance.primary_node)
5091 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5092 self.op.shutdown_timeout)
5093 msg = result.fail_msg
5095 if self.op.ignore_failures:
5096 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5098 raise errors.OpExecError("Could not shutdown instance %s on"
5100 (instance.name, instance.primary_node, msg))
5102 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5105 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5106 """Utility function to remove an instance.
5109 logging.info("Removing block devices for instance %s", instance.name)
5111 if not _RemoveDisks(lu, instance):
5112 if not ignore_failures:
5113 raise errors.OpExecError("Can't remove instance's disks")
5114 feedback_fn("Warning: can't remove instance's disks")
5116 logging.info("Removing instance %s out of cluster config", instance.name)
5118 lu.cfg.RemoveInstance(instance.name)
5120 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5121 "Instance lock removal conflict"
5123 # Remove lock for the instance
5124 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5127 class LUQueryInstances(NoHooksLU):
5128 """Logical unit for querying instances.
5131 # pylint: disable-msg=W0142
5133 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5134 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5135 ("use_locking", False, _TBool),
5138 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5139 "serial_no", "ctime", "mtime", "uuid"]
5140 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5142 "disk_template", "ip", "mac", "bridge",
5143 "nic_mode", "nic_link",
5144 "sda_size", "sdb_size", "vcpus", "tags",
5145 "network_port", "beparams",
5146 r"(disk)\.(size)/([0-9]+)",
5147 r"(disk)\.(sizes)", "disk_usage",
5148 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5149 r"(nic)\.(bridge)/([0-9]+)",
5150 r"(nic)\.(macs|ips|modes|links|bridges)",
5151 r"(disk|nic)\.(count)",
5153 ] + _SIMPLE_FIELDS +
5155 for name in constants.HVS_PARAMETERS
5156 if name not in constants.HVC_GLOBALS] +
5158 for name in constants.BES_PARAMETERS])
5159 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5165 def CheckArguments(self):
5166 _CheckOutputFields(static=self._FIELDS_STATIC,
5167 dynamic=self._FIELDS_DYNAMIC,
5168 selected=self.op.output_fields)
5170 def ExpandNames(self):
5171 self.needed_locks = {}
5172 self.share_locks[locking.LEVEL_INSTANCE] = 1
5173 self.share_locks[locking.LEVEL_NODE] = 1
5176 self.wanted = _GetWantedInstances(self, self.op.names)
5178 self.wanted = locking.ALL_SET
5180 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5181 self.do_locking = self.do_node_query and self.op.use_locking
5183 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5184 self.needed_locks[locking.LEVEL_NODE] = []
5185 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5187 def DeclareLocks(self, level):
5188 if level == locking.LEVEL_NODE and self.do_locking:
5189 self._LockInstancesNodes()
5191 def Exec(self, feedback_fn):
5192 """Computes the list of nodes and their attributes.
5195 # pylint: disable-msg=R0912
5196 # way too many branches here
5197 all_info = self.cfg.GetAllInstancesInfo()
5198 if self.wanted == locking.ALL_SET:
5199 # caller didn't specify instance names, so ordering is not important
5201 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5203 instance_names = all_info.keys()
5204 instance_names = utils.NiceSort(instance_names)
5206 # caller did specify names, so we must keep the ordering
5208 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5210 tgt_set = all_info.keys()
5211 missing = set(self.wanted).difference(tgt_set)
5213 raise errors.OpExecError("Some instances were removed before"
5214 " retrieving their data: %s" % missing)
5215 instance_names = self.wanted
5217 instance_list = [all_info[iname] for iname in instance_names]
5219 # begin data gathering
5221 nodes = frozenset([inst.primary_node for inst in instance_list])
5222 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5226 if self.do_node_query:
5228 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5230 result = node_data[name]
5232 # offline nodes will be in both lists
5233 off_nodes.append(name)
5235 bad_nodes.append(name)
5238 live_data.update(result.payload)
5239 # else no instance is alive
5241 live_data = dict([(name, {}) for name in instance_names])
5243 # end data gathering
5248 cluster = self.cfg.GetClusterInfo()
5249 for instance in instance_list:
5251 i_hv = cluster.FillHV(instance, skip_globals=True)
5252 i_be = cluster.FillBE(instance)
5253 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5254 for field in self.op.output_fields:
5255 st_match = self._FIELDS_STATIC.Matches(field)
5256 if field in self._SIMPLE_FIELDS:
5257 val = getattr(instance, field)
5258 elif field == "pnode":
5259 val = instance.primary_node
5260 elif field == "snodes":
5261 val = list(instance.secondary_nodes)
5262 elif field == "admin_state":
5263 val = instance.admin_up
5264 elif field == "oper_state":
5265 if instance.primary_node in bad_nodes:
5268 val = bool(live_data.get(instance.name))
5269 elif field == "status":
5270 if instance.primary_node in off_nodes:
5271 val = "ERROR_nodeoffline"
5272 elif instance.primary_node in bad_nodes:
5273 val = "ERROR_nodedown"
5275 running = bool(live_data.get(instance.name))
5277 if instance.admin_up:
5282 if instance.admin_up:
5286 elif field == "oper_ram":
5287 if instance.primary_node in bad_nodes:
5289 elif instance.name in live_data:
5290 val = live_data[instance.name].get("memory", "?")
5293 elif field == "oper_vcpus":
5294 if instance.primary_node in bad_nodes:
5296 elif instance.name in live_data:
5297 val = live_data[instance.name].get("vcpus", "?")
5300 elif field == "vcpus":
5301 val = i_be[constants.BE_VCPUS]
5302 elif field == "disk_template":
5303 val = instance.disk_template
5306 val = instance.nics[0].ip
5309 elif field == "nic_mode":
5311 val = i_nicp[0][constants.NIC_MODE]
5314 elif field == "nic_link":
5316 val = i_nicp[0][constants.NIC_LINK]
5319 elif field == "bridge":
5320 if (instance.nics and
5321 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5322 val = i_nicp[0][constants.NIC_LINK]
5325 elif field == "mac":
5327 val = instance.nics[0].mac
5330 elif field == "sda_size" or field == "sdb_size":
5331 idx = ord(field[2]) - ord('a')
5333 val = instance.FindDisk(idx).size
5334 except errors.OpPrereqError:
5336 elif field == "disk_usage": # total disk usage per node
5337 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5338 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5339 elif field == "tags":
5340 val = list(instance.GetTags())
5341 elif field == "hvparams":
5343 elif (field.startswith(HVPREFIX) and
5344 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5345 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5346 val = i_hv.get(field[len(HVPREFIX):], None)
5347 elif field == "beparams":
5349 elif (field.startswith(BEPREFIX) and
5350 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5351 val = i_be.get(field[len(BEPREFIX):], None)
5352 elif st_match and st_match.groups():
5353 # matches a variable list
5354 st_groups = st_match.groups()
5355 if st_groups and st_groups[0] == "disk":
5356 if st_groups[1] == "count":
5357 val = len(instance.disks)
5358 elif st_groups[1] == "sizes":
5359 val = [disk.size for disk in instance.disks]
5360 elif st_groups[1] == "size":
5362 val = instance.FindDisk(st_groups[2]).size
5363 except errors.OpPrereqError:
5366 assert False, "Unhandled disk parameter"
5367 elif st_groups[0] == "nic":
5368 if st_groups[1] == "count":
5369 val = len(instance.nics)
5370 elif st_groups[1] == "macs":
5371 val = [nic.mac for nic in instance.nics]
5372 elif st_groups[1] == "ips":
5373 val = [nic.ip for nic in instance.nics]
5374 elif st_groups[1] == "modes":
5375 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5376 elif st_groups[1] == "links":
5377 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5378 elif st_groups[1] == "bridges":
5381 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5382 val.append(nicp[constants.NIC_LINK])
5387 nic_idx = int(st_groups[2])
5388 if nic_idx >= len(instance.nics):
5391 if st_groups[1] == "mac":
5392 val = instance.nics[nic_idx].mac
5393 elif st_groups[1] == "ip":
5394 val = instance.nics[nic_idx].ip
5395 elif st_groups[1] == "mode":
5396 val = i_nicp[nic_idx][constants.NIC_MODE]
5397 elif st_groups[1] == "link":
5398 val = i_nicp[nic_idx][constants.NIC_LINK]
5399 elif st_groups[1] == "bridge":
5400 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5401 if nic_mode == constants.NIC_MODE_BRIDGED:
5402 val = i_nicp[nic_idx][constants.NIC_LINK]
5406 assert False, "Unhandled NIC parameter"
5408 assert False, ("Declared but unhandled variable parameter '%s'" %
5411 assert False, "Declared but unhandled parameter '%s'" % field
5418 class LUFailoverInstance(LogicalUnit):
5419 """Failover an instance.
5422 HPATH = "instance-failover"
5423 HTYPE = constants.HTYPE_INSTANCE
5426 ("ignore_consistency", False, _TBool),
5431 def ExpandNames(self):
5432 self._ExpandAndLockInstance()
5433 self.needed_locks[locking.LEVEL_NODE] = []
5434 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5436 def DeclareLocks(self, level):
5437 if level == locking.LEVEL_NODE:
5438 self._LockInstancesNodes()
5440 def BuildHooksEnv(self):
5443 This runs on master, primary and secondary nodes of the instance.
5446 instance = self.instance
5447 source_node = instance.primary_node
5448 target_node = instance.secondary_nodes[0]
5450 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5451 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5452 "OLD_PRIMARY": source_node,
5453 "OLD_SECONDARY": target_node,
5454 "NEW_PRIMARY": target_node,
5455 "NEW_SECONDARY": source_node,
5457 env.update(_BuildInstanceHookEnvByObject(self, instance))
5458 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5460 nl_post.append(source_node)
5461 return env, nl, nl_post
5463 def CheckPrereq(self):
5464 """Check prerequisites.
5466 This checks that the instance is in the cluster.
5469 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5470 assert self.instance is not None, \
5471 "Cannot retrieve locked instance %s" % self.op.instance_name
5473 bep = self.cfg.GetClusterInfo().FillBE(instance)
5474 if instance.disk_template not in constants.DTS_NET_MIRROR:
5475 raise errors.OpPrereqError("Instance's disk layout is not"
5476 " network mirrored, cannot failover.",
5479 secondary_nodes = instance.secondary_nodes
5480 if not secondary_nodes:
5481 raise errors.ProgrammerError("no secondary node but using "
5482 "a mirrored disk template")
5484 target_node = secondary_nodes[0]
5485 _CheckNodeOnline(self, target_node)
5486 _CheckNodeNotDrained(self, target_node)
5487 if instance.admin_up:
5488 # check memory requirements on the secondary node
5489 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5490 instance.name, bep[constants.BE_MEMORY],
5491 instance.hypervisor)
5493 self.LogInfo("Not checking memory on the secondary node as"
5494 " instance will not be started")
5496 # check bridge existance
5497 _CheckInstanceBridgesExist(self, instance, node=target_node)
5499 def Exec(self, feedback_fn):
5500 """Failover an instance.
5502 The failover is done by shutting it down on its present node and
5503 starting it on the secondary.
5506 instance = self.instance
5507 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5509 source_node = instance.primary_node
5510 target_node = instance.secondary_nodes[0]
5512 if instance.admin_up:
5513 feedback_fn("* checking disk consistency between source and target")
5514 for dev in instance.disks:
5515 # for drbd, these are drbd over lvm
5516 if not _CheckDiskConsistency(self, dev, target_node, False):
5517 if not self.op.ignore_consistency:
5518 raise errors.OpExecError("Disk %s is degraded on target node,"
5519 " aborting failover." % dev.iv_name)
5521 feedback_fn("* not checking disk consistency as instance is not running")
5523 feedback_fn("* shutting down instance on source node")
5524 logging.info("Shutting down instance %s on node %s",
5525 instance.name, source_node)
5527 result = self.rpc.call_instance_shutdown(source_node, instance,
5528 self.op.shutdown_timeout)
5529 msg = result.fail_msg
5531 if self.op.ignore_consistency or primary_node.offline:
5532 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5533 " Proceeding anyway. Please make sure node"
5534 " %s is down. Error details: %s",
5535 instance.name, source_node, source_node, msg)
5537 raise errors.OpExecError("Could not shutdown instance %s on"
5539 (instance.name, source_node, msg))
5541 feedback_fn("* deactivating the instance's disks on source node")
5542 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5543 raise errors.OpExecError("Can't shut down the instance's disks.")
5545 instance.primary_node = target_node
5546 # distribute new instance config to the other nodes
5547 self.cfg.Update(instance, feedback_fn)
5549 # Only start the instance if it's marked as up
5550 if instance.admin_up:
5551 feedback_fn("* activating the instance's disks on target node")
5552 logging.info("Starting instance %s on node %s",
5553 instance.name, target_node)
5555 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5556 ignore_secondaries=True)
5558 _ShutdownInstanceDisks(self, instance)
5559 raise errors.OpExecError("Can't activate the instance's disks")
5561 feedback_fn("* starting the instance on the target node")
5562 result = self.rpc.call_instance_start(target_node, instance, None, None)
5563 msg = result.fail_msg
5565 _ShutdownInstanceDisks(self, instance)
5566 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5567 (instance.name, target_node, msg))
5570 class LUMigrateInstance(LogicalUnit):
5571 """Migrate an instance.
5573 This is migration without shutting down, compared to the failover,
5574 which is done with shutdown.
5577 HPATH = "instance-migrate"
5578 HTYPE = constants.HTYPE_INSTANCE
5583 ("cleanup", False, _TBool),
5588 def ExpandNames(self):
5589 self._ExpandAndLockInstance()
5591 self.needed_locks[locking.LEVEL_NODE] = []
5592 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5594 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5596 self.tasklets = [self._migrater]
5598 def DeclareLocks(self, level):
5599 if level == locking.LEVEL_NODE:
5600 self._LockInstancesNodes()
5602 def BuildHooksEnv(self):
5605 This runs on master, primary and secondary nodes of the instance.
5608 instance = self._migrater.instance
5609 source_node = instance.primary_node
5610 target_node = instance.secondary_nodes[0]
5611 env = _BuildInstanceHookEnvByObject(self, instance)
5612 env["MIGRATE_LIVE"] = self._migrater.live
5613 env["MIGRATE_CLEANUP"] = self.op.cleanup
5615 "OLD_PRIMARY": source_node,
5616 "OLD_SECONDARY": target_node,
5617 "NEW_PRIMARY": target_node,
5618 "NEW_SECONDARY": source_node,
5620 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5622 nl_post.append(source_node)
5623 return env, nl, nl_post
5626 class LUMoveInstance(LogicalUnit):
5627 """Move an instance by data-copying.
5630 HPATH = "instance-move"
5631 HTYPE = constants.HTYPE_INSTANCE
5634 ("target_node", _NoDefault, _TNonEmptyString),
5639 def ExpandNames(self):
5640 self._ExpandAndLockInstance()
5641 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5642 self.op.target_node = target_node
5643 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5644 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5646 def DeclareLocks(self, level):
5647 if level == locking.LEVEL_NODE:
5648 self._LockInstancesNodes(primary_only=True)
5650 def BuildHooksEnv(self):
5653 This runs on master, primary and secondary nodes of the instance.
5657 "TARGET_NODE": self.op.target_node,
5658 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5660 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5661 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5662 self.op.target_node]
5665 def CheckPrereq(self):
5666 """Check prerequisites.
5668 This checks that the instance is in the cluster.
5671 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5672 assert self.instance is not None, \
5673 "Cannot retrieve locked instance %s" % self.op.instance_name
5675 node = self.cfg.GetNodeInfo(self.op.target_node)
5676 assert node is not None, \
5677 "Cannot retrieve locked node %s" % self.op.target_node
5679 self.target_node = target_node = node.name
5681 if target_node == instance.primary_node:
5682 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5683 (instance.name, target_node),
5686 bep = self.cfg.GetClusterInfo().FillBE(instance)
5688 for idx, dsk in enumerate(instance.disks):
5689 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5690 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5691 " cannot copy" % idx, errors.ECODE_STATE)
5693 _CheckNodeOnline(self, target_node)
5694 _CheckNodeNotDrained(self, target_node)
5696 if instance.admin_up:
5697 # check memory requirements on the secondary node
5698 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5699 instance.name, bep[constants.BE_MEMORY],
5700 instance.hypervisor)
5702 self.LogInfo("Not checking memory on the secondary node as"
5703 " instance will not be started")
5705 # check bridge existance
5706 _CheckInstanceBridgesExist(self, instance, node=target_node)
5708 def Exec(self, feedback_fn):
5709 """Move an instance.
5711 The move is done by shutting it down on its present node, copying
5712 the data over (slow) and starting it on the new node.
5715 instance = self.instance
5717 source_node = instance.primary_node
5718 target_node = self.target_node
5720 self.LogInfo("Shutting down instance %s on source node %s",
5721 instance.name, source_node)
5723 result = self.rpc.call_instance_shutdown(source_node, instance,
5724 self.op.shutdown_timeout)
5725 msg = result.fail_msg
5727 if self.op.ignore_consistency:
5728 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5729 " Proceeding anyway. Please make sure node"
5730 " %s is down. Error details: %s",
5731 instance.name, source_node, source_node, msg)
5733 raise errors.OpExecError("Could not shutdown instance %s on"
5735 (instance.name, source_node, msg))
5737 # create the target disks
5739 _CreateDisks(self, instance, target_node=target_node)
5740 except errors.OpExecError:
5741 self.LogWarning("Device creation failed, reverting...")
5743 _RemoveDisks(self, instance, target_node=target_node)
5745 self.cfg.ReleaseDRBDMinors(instance.name)
5748 cluster_name = self.cfg.GetClusterInfo().cluster_name
5751 # activate, get path, copy the data over
5752 for idx, disk in enumerate(instance.disks):
5753 self.LogInfo("Copying data for disk %d", idx)
5754 result = self.rpc.call_blockdev_assemble(target_node, disk,
5755 instance.name, True)
5757 self.LogWarning("Can't assemble newly created disk %d: %s",
5758 idx, result.fail_msg)
5759 errs.append(result.fail_msg)
5761 dev_path = result.payload
5762 result = self.rpc.call_blockdev_export(source_node, disk,
5763 target_node, dev_path,
5766 self.LogWarning("Can't copy data over for disk %d: %s",
5767 idx, result.fail_msg)
5768 errs.append(result.fail_msg)
5772 self.LogWarning("Some disks failed to copy, aborting")
5774 _RemoveDisks(self, instance, target_node=target_node)
5776 self.cfg.ReleaseDRBDMinors(instance.name)
5777 raise errors.OpExecError("Errors during disk copy: %s" %
5780 instance.primary_node = target_node
5781 self.cfg.Update(instance, feedback_fn)
5783 self.LogInfo("Removing the disks on the original node")
5784 _RemoveDisks(self, instance, target_node=source_node)
5786 # Only start the instance if it's marked as up
5787 if instance.admin_up:
5788 self.LogInfo("Starting instance %s on node %s",
5789 instance.name, target_node)
5791 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5792 ignore_secondaries=True)
5794 _ShutdownInstanceDisks(self, instance)
5795 raise errors.OpExecError("Can't activate the instance's disks")
5797 result = self.rpc.call_instance_start(target_node, instance, None, None)
5798 msg = result.fail_msg
5800 _ShutdownInstanceDisks(self, instance)
5801 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5802 (instance.name, target_node, msg))
5805 class LUMigrateNode(LogicalUnit):
5806 """Migrate all instances from a node.
5809 HPATH = "node-migrate"
5810 HTYPE = constants.HTYPE_NODE
5818 def ExpandNames(self):
5819 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5821 self.needed_locks = {
5822 locking.LEVEL_NODE: [self.op.node_name],
5825 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5827 # Create tasklets for migrating instances for all instances on this node
5831 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5832 logging.debug("Migrating instance %s", inst.name)
5833 names.append(inst.name)
5835 tasklets.append(TLMigrateInstance(self, inst.name, False))
5837 self.tasklets = tasklets
5839 # Declare instance locks
5840 self.needed_locks[locking.LEVEL_INSTANCE] = names
5842 def DeclareLocks(self, level):
5843 if level == locking.LEVEL_NODE:
5844 self._LockInstancesNodes()
5846 def BuildHooksEnv(self):
5849 This runs on the master, the primary and all the secondaries.
5853 "NODE_NAME": self.op.node_name,
5856 nl = [self.cfg.GetMasterNode()]
5858 return (env, nl, nl)
5861 class TLMigrateInstance(Tasklet):
5862 """Tasklet class for instance migration.
5865 @ivar live: whether the migration will be done live or non-live;
5866 this variable is initalized only after CheckPrereq has run
5869 def __init__(self, lu, instance_name, cleanup):
5870 """Initializes this class.
5873 Tasklet.__init__(self, lu)
5876 self.instance_name = instance_name
5877 self.cleanup = cleanup
5878 self.live = False # will be overridden later
5880 def CheckPrereq(self):
5881 """Check prerequisites.
5883 This checks that the instance is in the cluster.
5886 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5887 instance = self.cfg.GetInstanceInfo(instance_name)
5888 assert instance is not None
5890 if instance.disk_template != constants.DT_DRBD8:
5891 raise errors.OpPrereqError("Instance's disk layout is not"
5892 " drbd8, cannot migrate.", errors.ECODE_STATE)
5894 secondary_nodes = instance.secondary_nodes
5895 if not secondary_nodes:
5896 raise errors.ConfigurationError("No secondary node but using"
5897 " drbd8 disk template")
5899 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5901 target_node = secondary_nodes[0]
5902 # check memory requirements on the secondary node
5903 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5904 instance.name, i_be[constants.BE_MEMORY],
5905 instance.hypervisor)
5907 # check bridge existance
5908 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5910 if not self.cleanup:
5911 _CheckNodeNotDrained(self.lu, target_node)
5912 result = self.rpc.call_instance_migratable(instance.primary_node,
5914 result.Raise("Can't migrate, please use failover",
5915 prereq=True, ecode=errors.ECODE_STATE)
5917 self.instance = instance
5919 if self.lu.op.live is not None and self.lu.op.mode is not None:
5920 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5921 " parameters are accepted",
5923 if self.lu.op.live is not None:
5925 self.lu.op.mode = constants.HT_MIGRATION_LIVE
5927 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5928 # reset the 'live' parameter to None so that repeated
5929 # invocations of CheckPrereq do not raise an exception
5930 self.lu.op.live = None
5931 elif self.lu.op.mode is None:
5932 # read the default value from the hypervisor
5933 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5934 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5936 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5938 def _WaitUntilSync(self):
5939 """Poll with custom rpc for disk sync.
5941 This uses our own step-based rpc call.
5944 self.feedback_fn("* wait until resync is done")
5948 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5950 self.instance.disks)
5952 for node, nres in result.items():
5953 nres.Raise("Cannot resync disks on node %s" % node)
5954 node_done, node_percent = nres.payload
5955 all_done = all_done and node_done
5956 if node_percent is not None:
5957 min_percent = min(min_percent, node_percent)
5959 if min_percent < 100:
5960 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5963 def _EnsureSecondary(self, node):
5964 """Demote a node to secondary.
5967 self.feedback_fn("* switching node %s to secondary mode" % node)
5969 for dev in self.instance.disks:
5970 self.cfg.SetDiskID(dev, node)
5972 result = self.rpc.call_blockdev_close(node, self.instance.name,
5973 self.instance.disks)
5974 result.Raise("Cannot change disk to secondary on node %s" % node)
5976 def _GoStandalone(self):
5977 """Disconnect from the network.
5980 self.feedback_fn("* changing into standalone mode")
5981 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5982 self.instance.disks)
5983 for node, nres in result.items():
5984 nres.Raise("Cannot disconnect disks node %s" % node)
5986 def _GoReconnect(self, multimaster):
5987 """Reconnect to the network.
5993 msg = "single-master"
5994 self.feedback_fn("* changing disks into %s mode" % msg)
5995 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5996 self.instance.disks,
5997 self.instance.name, multimaster)
5998 for node, nres in result.items():
5999 nres.Raise("Cannot change disks config on node %s" % node)
6001 def _ExecCleanup(self):
6002 """Try to cleanup after a failed migration.
6004 The cleanup is done by:
6005 - check that the instance is running only on one node
6006 (and update the config if needed)
6007 - change disks on its secondary node to secondary
6008 - wait until disks are fully synchronized
6009 - disconnect from the network
6010 - change disks into single-master mode
6011 - wait again until disks are fully synchronized
6014 instance = self.instance
6015 target_node = self.target_node
6016 source_node = self.source_node
6018 # check running on only one node
6019 self.feedback_fn("* checking where the instance actually runs"
6020 " (if this hangs, the hypervisor might be in"
6022 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6023 for node, result in ins_l.items():
6024 result.Raise("Can't contact node %s" % node)
6026 runningon_source = instance.name in ins_l[source_node].payload
6027 runningon_target = instance.name in ins_l[target_node].payload
6029 if runningon_source and runningon_target:
6030 raise errors.OpExecError("Instance seems to be running on two nodes,"
6031 " or the hypervisor is confused. You will have"
6032 " to ensure manually that it runs only on one"
6033 " and restart this operation.")
6035 if not (runningon_source or runningon_target):
6036 raise errors.OpExecError("Instance does not seem to be running at all."
6037 " In this case, it's safer to repair by"
6038 " running 'gnt-instance stop' to ensure disk"
6039 " shutdown, and then restarting it.")
6041 if runningon_target:
6042 # the migration has actually succeeded, we need to update the config
6043 self.feedback_fn("* instance running on secondary node (%s),"
6044 " updating config" % target_node)
6045 instance.primary_node = target_node
6046 self.cfg.Update(instance, self.feedback_fn)
6047 demoted_node = source_node
6049 self.feedback_fn("* instance confirmed to be running on its"
6050 " primary node (%s)" % source_node)
6051 demoted_node = target_node
6053 self._EnsureSecondary(demoted_node)
6055 self._WaitUntilSync()
6056 except errors.OpExecError:
6057 # we ignore here errors, since if the device is standalone, it
6058 # won't be able to sync
6060 self._GoStandalone()
6061 self._GoReconnect(False)
6062 self._WaitUntilSync()
6064 self.feedback_fn("* done")
6066 def _RevertDiskStatus(self):
6067 """Try to revert the disk status after a failed migration.
6070 target_node = self.target_node
6072 self._EnsureSecondary(target_node)
6073 self._GoStandalone()
6074 self._GoReconnect(False)
6075 self._WaitUntilSync()
6076 except errors.OpExecError, err:
6077 self.lu.LogWarning("Migration failed and I can't reconnect the"
6078 " drives: error '%s'\n"
6079 "Please look and recover the instance status" %
6082 def _AbortMigration(self):
6083 """Call the hypervisor code to abort a started migration.
6086 instance = self.instance
6087 target_node = self.target_node
6088 migration_info = self.migration_info
6090 abort_result = self.rpc.call_finalize_migration(target_node,
6094 abort_msg = abort_result.fail_msg
6096 logging.error("Aborting migration failed on target node %s: %s",
6097 target_node, abort_msg)
6098 # Don't raise an exception here, as we stil have to try to revert the
6099 # disk status, even if this step failed.
6101 def _ExecMigration(self):
6102 """Migrate an instance.
6104 The migrate is done by:
6105 - change the disks into dual-master mode
6106 - wait until disks are fully synchronized again
6107 - migrate the instance
6108 - change disks on the new secondary node (the old primary) to secondary
6109 - wait until disks are fully synchronized
6110 - change disks into single-master mode
6113 instance = self.instance
6114 target_node = self.target_node
6115 source_node = self.source_node
6117 self.feedback_fn("* checking disk consistency between source and target")
6118 for dev in instance.disks:
6119 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6120 raise errors.OpExecError("Disk %s is degraded or not fully"
6121 " synchronized on target node,"
6122 " aborting migrate." % dev.iv_name)
6124 # First get the migration information from the remote node
6125 result = self.rpc.call_migration_info(source_node, instance)
6126 msg = result.fail_msg
6128 log_err = ("Failed fetching source migration information from %s: %s" %
6130 logging.error(log_err)
6131 raise errors.OpExecError(log_err)
6133 self.migration_info = migration_info = result.payload
6135 # Then switch the disks to master/master mode
6136 self._EnsureSecondary(target_node)
6137 self._GoStandalone()
6138 self._GoReconnect(True)
6139 self._WaitUntilSync()
6141 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6142 result = self.rpc.call_accept_instance(target_node,
6145 self.nodes_ip[target_node])
6147 msg = result.fail_msg
6149 logging.error("Instance pre-migration failed, trying to revert"
6150 " disk status: %s", msg)
6151 self.feedback_fn("Pre-migration failed, aborting")
6152 self._AbortMigration()
6153 self._RevertDiskStatus()
6154 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6155 (instance.name, msg))
6157 self.feedback_fn("* migrating instance to %s" % target_node)
6159 result = self.rpc.call_instance_migrate(source_node, instance,
6160 self.nodes_ip[target_node],
6162 msg = result.fail_msg
6164 logging.error("Instance migration failed, trying to revert"
6165 " disk status: %s", msg)
6166 self.feedback_fn("Migration failed, aborting")
6167 self._AbortMigration()
6168 self._RevertDiskStatus()
6169 raise errors.OpExecError("Could not migrate instance %s: %s" %
6170 (instance.name, msg))
6173 instance.primary_node = target_node
6174 # distribute new instance config to the other nodes
6175 self.cfg.Update(instance, self.feedback_fn)
6177 result = self.rpc.call_finalize_migration(target_node,
6181 msg = result.fail_msg
6183 logging.error("Instance migration succeeded, but finalization failed:"
6185 raise errors.OpExecError("Could not finalize instance migration: %s" %
6188 self._EnsureSecondary(source_node)
6189 self._WaitUntilSync()
6190 self._GoStandalone()
6191 self._GoReconnect(False)
6192 self._WaitUntilSync()
6194 self.feedback_fn("* done")
6196 def Exec(self, feedback_fn):
6197 """Perform the migration.
6200 feedback_fn("Migrating instance %s" % self.instance.name)
6202 self.feedback_fn = feedback_fn
6204 self.source_node = self.instance.primary_node
6205 self.target_node = self.instance.secondary_nodes[0]
6206 self.all_nodes = [self.source_node, self.target_node]
6208 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6209 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6213 return self._ExecCleanup()
6215 return self._ExecMigration()
6218 def _CreateBlockDev(lu, node, instance, device, force_create,
6220 """Create a tree of block devices on a given node.
6222 If this device type has to be created on secondaries, create it and
6225 If not, just recurse to children keeping the same 'force' value.
6227 @param lu: the lu on whose behalf we execute
6228 @param node: the node on which to create the device
6229 @type instance: L{objects.Instance}
6230 @param instance: the instance which owns the device
6231 @type device: L{objects.Disk}
6232 @param device: the device to create
6233 @type force_create: boolean
6234 @param force_create: whether to force creation of this device; this
6235 will be change to True whenever we find a device which has
6236 CreateOnSecondary() attribute
6237 @param info: the extra 'metadata' we should attach to the device
6238 (this will be represented as a LVM tag)
6239 @type force_open: boolean
6240 @param force_open: this parameter will be passes to the
6241 L{backend.BlockdevCreate} function where it specifies
6242 whether we run on primary or not, and it affects both
6243 the child assembly and the device own Open() execution
6246 if device.CreateOnSecondary():
6250 for child in device.children:
6251 _CreateBlockDev(lu, node, instance, child, force_create,
6254 if not force_create:
6257 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6260 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6261 """Create a single block device on a given node.
6263 This will not recurse over children of the device, so they must be
6266 @param lu: the lu on whose behalf we execute
6267 @param node: the node on which to create the device
6268 @type instance: L{objects.Instance}
6269 @param instance: the instance which owns the device
6270 @type device: L{objects.Disk}
6271 @param device: the device to create
6272 @param info: the extra 'metadata' we should attach to the device
6273 (this will be represented as a LVM tag)
6274 @type force_open: boolean
6275 @param force_open: this parameter will be passes to the
6276 L{backend.BlockdevCreate} function where it specifies
6277 whether we run on primary or not, and it affects both
6278 the child assembly and the device own Open() execution
6281 lu.cfg.SetDiskID(device, node)
6282 result = lu.rpc.call_blockdev_create(node, device, device.size,
6283 instance.name, force_open, info)
6284 result.Raise("Can't create block device %s on"
6285 " node %s for instance %s" % (device, node, instance.name))
6286 if device.physical_id is None:
6287 device.physical_id = result.payload
6290 def _GenerateUniqueNames(lu, exts):
6291 """Generate a suitable LV name.
6293 This will generate a logical volume name for the given instance.
6298 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6299 results.append("%s%s" % (new_id, val))
6303 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6305 """Generate a drbd8 device complete with its children.
6308 port = lu.cfg.AllocatePort()
6309 vgname = lu.cfg.GetVGName()
6310 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6311 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6312 logical_id=(vgname, names[0]))
6313 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6314 logical_id=(vgname, names[1]))
6315 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6316 logical_id=(primary, secondary, port,
6319 children=[dev_data, dev_meta],
6324 def _GenerateDiskTemplate(lu, template_name,
6325 instance_name, primary_node,
6326 secondary_nodes, disk_info,
6327 file_storage_dir, file_driver,
6329 """Generate the entire disk layout for a given template type.
6332 #TODO: compute space requirements
6334 vgname = lu.cfg.GetVGName()
6335 disk_count = len(disk_info)
6337 if template_name == constants.DT_DISKLESS:
6339 elif template_name == constants.DT_PLAIN:
6340 if len(secondary_nodes) != 0:
6341 raise errors.ProgrammerError("Wrong template configuration")
6343 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6344 for i in range(disk_count)])
6345 for idx, disk in enumerate(disk_info):
6346 disk_index = idx + base_index
6347 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6348 logical_id=(vgname, names[idx]),
6349 iv_name="disk/%d" % disk_index,
6351 disks.append(disk_dev)
6352 elif template_name == constants.DT_DRBD8:
6353 if len(secondary_nodes) != 1:
6354 raise errors.ProgrammerError("Wrong template configuration")
6355 remote_node = secondary_nodes[0]
6356 minors = lu.cfg.AllocateDRBDMinor(
6357 [primary_node, remote_node] * len(disk_info), instance_name)
6360 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6361 for i in range(disk_count)]):
6362 names.append(lv_prefix + "_data")
6363 names.append(lv_prefix + "_meta")
6364 for idx, disk in enumerate(disk_info):
6365 disk_index = idx + base_index
6366 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6367 disk["size"], names[idx*2:idx*2+2],
6368 "disk/%d" % disk_index,
6369 minors[idx*2], minors[idx*2+1])
6370 disk_dev.mode = disk["mode"]
6371 disks.append(disk_dev)
6372 elif template_name == constants.DT_FILE:
6373 if len(secondary_nodes) != 0:
6374 raise errors.ProgrammerError("Wrong template configuration")
6376 _RequireFileStorage()
6378 for idx, disk in enumerate(disk_info):
6379 disk_index = idx + base_index
6380 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6381 iv_name="disk/%d" % disk_index,
6382 logical_id=(file_driver,
6383 "%s/disk%d" % (file_storage_dir,
6386 disks.append(disk_dev)
6388 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6392 def _GetInstanceInfoText(instance):
6393 """Compute that text that should be added to the disk's metadata.
6396 return "originstname+%s" % instance.name
6399 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6400 """Create all disks for an instance.
6402 This abstracts away some work from AddInstance.
6404 @type lu: L{LogicalUnit}
6405 @param lu: the logical unit on whose behalf we execute
6406 @type instance: L{objects.Instance}
6407 @param instance: the instance whose disks we should create
6409 @param to_skip: list of indices to skip
6410 @type target_node: string
6411 @param target_node: if passed, overrides the target node for creation
6413 @return: the success of the creation
6416 info = _GetInstanceInfoText(instance)
6417 if target_node is None:
6418 pnode = instance.primary_node
6419 all_nodes = instance.all_nodes
6424 if instance.disk_template == constants.DT_FILE:
6425 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6426 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6428 result.Raise("Failed to create directory '%s' on"
6429 " node %s" % (file_storage_dir, pnode))
6431 # Note: this needs to be kept in sync with adding of disks in
6432 # LUSetInstanceParams
6433 for idx, device in enumerate(instance.disks):
6434 if to_skip and idx in to_skip:
6436 logging.info("Creating volume %s for instance %s",
6437 device.iv_name, instance.name)
6439 for node in all_nodes:
6440 f_create = node == pnode
6441 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6444 def _RemoveDisks(lu, instance, target_node=None):
6445 """Remove all disks for an instance.
6447 This abstracts away some work from `AddInstance()` and
6448 `RemoveInstance()`. Note that in case some of the devices couldn't
6449 be removed, the removal will continue with the other ones (compare
6450 with `_CreateDisks()`).
6452 @type lu: L{LogicalUnit}
6453 @param lu: the logical unit on whose behalf we execute
6454 @type instance: L{objects.Instance}
6455 @param instance: the instance whose disks we should remove
6456 @type target_node: string
6457 @param target_node: used to override the node on which to remove the disks
6459 @return: the success of the removal
6462 logging.info("Removing block devices for instance %s", instance.name)
6465 for device in instance.disks:
6467 edata = [(target_node, device)]
6469 edata = device.ComputeNodeTree(instance.primary_node)
6470 for node, disk in edata:
6471 lu.cfg.SetDiskID(disk, node)
6472 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6474 lu.LogWarning("Could not remove block device %s on node %s,"
6475 " continuing anyway: %s", device.iv_name, node, msg)
6478 if instance.disk_template == constants.DT_FILE:
6479 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6483 tgt = instance.primary_node
6484 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6486 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6487 file_storage_dir, instance.primary_node, result.fail_msg)
6493 def _ComputeDiskSize(disk_template, disks):
6494 """Compute disk size requirements in the volume group
6497 # Required free disk space as a function of disk and swap space
6499 constants.DT_DISKLESS: None,
6500 constants.DT_PLAIN: sum(d["size"] for d in disks),
6501 # 128 MB are added for drbd metadata for each disk
6502 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6503 constants.DT_FILE: None,
6506 if disk_template not in req_size_dict:
6507 raise errors.ProgrammerError("Disk template '%s' size requirement"
6508 " is unknown" % disk_template)
6510 return req_size_dict[disk_template]
6513 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6514 """Hypervisor parameter validation.
6516 This function abstract the hypervisor parameter validation to be
6517 used in both instance create and instance modify.
6519 @type lu: L{LogicalUnit}
6520 @param lu: the logical unit for which we check
6521 @type nodenames: list
6522 @param nodenames: the list of nodes on which we should check
6523 @type hvname: string
6524 @param hvname: the name of the hypervisor we should use
6525 @type hvparams: dict
6526 @param hvparams: the parameters which we need to check
6527 @raise errors.OpPrereqError: if the parameters are not valid
6530 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6533 for node in nodenames:
6537 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6540 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6541 """OS parameters validation.
6543 @type lu: L{LogicalUnit}
6544 @param lu: the logical unit for which we check
6545 @type required: boolean
6546 @param required: whether the validation should fail if the OS is not
6548 @type nodenames: list
6549 @param nodenames: the list of nodes on which we should check
6550 @type osname: string
6551 @param osname: the name of the hypervisor we should use
6552 @type osparams: dict
6553 @param osparams: the parameters which we need to check
6554 @raise errors.OpPrereqError: if the parameters are not valid
6557 result = lu.rpc.call_os_validate(required, nodenames, osname,
6558 [constants.OS_VALIDATE_PARAMETERS],
6560 for node, nres in result.items():
6561 # we don't check for offline cases since this should be run only
6562 # against the master node and/or an instance's nodes
6563 nres.Raise("OS Parameters validation failed on node %s" % node)
6564 if not nres.payload:
6565 lu.LogInfo("OS %s not found on node %s, validation skipped",
6569 class LUCreateInstance(LogicalUnit):
6570 """Create an instance.
6573 HPATH = "instance-add"
6574 HTYPE = constants.HTYPE_INSTANCE
6577 ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6578 ("start", True, _TBool),
6579 ("wait_for_sync", True, _TBool),
6580 ("ip_check", True, _TBool),
6581 ("name_check", True, _TBool),
6582 ("disks", _NoDefault, _TListOf(_TDict)),
6583 ("nics", _NoDefault, _TListOf(_TDict)),
6584 ("hvparams", _EmptyDict, _TDict),
6585 ("beparams", _EmptyDict, _TDict),
6586 ("osparams", _EmptyDict, _TDict),
6587 ("no_install", None, _TMaybeBool),
6588 ("os_type", None, _TMaybeString),
6589 ("force_variant", False, _TBool),
6590 ("source_handshake", None, _TOr(_TList, _TNone)),
6591 ("source_x509_ca", None, _TMaybeString),
6592 ("source_instance_name", None, _TMaybeString),
6593 ("src_node", None, _TMaybeString),
6594 ("src_path", None, _TMaybeString),
6595 ("pnode", None, _TMaybeString),
6596 ("snode", None, _TMaybeString),
6597 ("iallocator", None, _TMaybeString),
6598 ("hypervisor", None, _TMaybeString),
6599 ("disk_template", _NoDefault, _CheckDiskTemplate),
6600 ("identify_defaults", False, _TBool),
6601 ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6602 ("file_storage_dir", None, _TMaybeString),
6606 def CheckArguments(self):
6610 # do not require name_check to ease forward/backward compatibility
6612 if self.op.no_install and self.op.start:
6613 self.LogInfo("No-installation mode selected, disabling startup")
6614 self.op.start = False
6615 # validate/normalize the instance name
6616 self.op.instance_name = \
6617 netutils.Hostname.GetNormalizedName(self.op.instance_name)
6619 if self.op.ip_check and not self.op.name_check:
6620 # TODO: make the ip check more flexible and not depend on the name check
6621 raise errors.OpPrereqError("Cannot do ip check without a name check",
6624 # check nics' parameter names
6625 for nic in self.op.nics:
6626 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6628 # check disks. parameter names and consistent adopt/no-adopt strategy
6629 has_adopt = has_no_adopt = False
6630 for disk in self.op.disks:
6631 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6636 if has_adopt and has_no_adopt:
6637 raise errors.OpPrereqError("Either all disks are adopted or none is",
6640 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6641 raise errors.OpPrereqError("Disk adoption is not supported for the"
6642 " '%s' disk template" %
6643 self.op.disk_template,
6645 if self.op.iallocator is not None:
6646 raise errors.OpPrereqError("Disk adoption not allowed with an"
6647 " iallocator script", errors.ECODE_INVAL)
6648 if self.op.mode == constants.INSTANCE_IMPORT:
6649 raise errors.OpPrereqError("Disk adoption not allowed for"
6650 " instance import", errors.ECODE_INVAL)
6652 self.adopt_disks = has_adopt
6654 # instance name verification
6655 if self.op.name_check:
6656 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6657 self.op.instance_name = self.hostname1.name
6658 # used in CheckPrereq for ip ping check
6659 self.check_ip = self.hostname1.ip
6660 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6661 raise errors.OpPrereqError("Remote imports require names to be checked" %
6664 self.check_ip = None
6666 # file storage checks
6667 if (self.op.file_driver and
6668 not self.op.file_driver in constants.FILE_DRIVER):
6669 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6670 self.op.file_driver, errors.ECODE_INVAL)
6672 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6673 raise errors.OpPrereqError("File storage directory path not absolute",
6676 ### Node/iallocator related checks
6677 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6679 if self.op.pnode is not None:
6680 if self.op.disk_template in constants.DTS_NET_MIRROR:
6681 if self.op.snode is None:
6682 raise errors.OpPrereqError("The networked disk templates need"
6683 " a mirror node", errors.ECODE_INVAL)
6685 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6687 self.op.snode = None
6689 self._cds = _GetClusterDomainSecret()
6691 if self.op.mode == constants.INSTANCE_IMPORT:
6692 # On import force_variant must be True, because if we forced it at
6693 # initial install, our only chance when importing it back is that it
6695 self.op.force_variant = True
6697 if self.op.no_install:
6698 self.LogInfo("No-installation mode has no effect during import")
6700 elif self.op.mode == constants.INSTANCE_CREATE:
6701 if self.op.os_type is None:
6702 raise errors.OpPrereqError("No guest OS specified",
6704 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6705 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6706 " installation" % self.op.os_type,
6708 if self.op.disk_template is None:
6709 raise errors.OpPrereqError("No disk template specified",
6712 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6713 # Check handshake to ensure both clusters have the same domain secret
6714 src_handshake = self.op.source_handshake
6715 if not src_handshake:
6716 raise errors.OpPrereqError("Missing source handshake",
6719 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6722 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6725 # Load and check source CA
6726 self.source_x509_ca_pem = self.op.source_x509_ca
6727 if not self.source_x509_ca_pem:
6728 raise errors.OpPrereqError("Missing source X509 CA",
6732 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6734 except OpenSSL.crypto.Error, err:
6735 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6736 (err, ), errors.ECODE_INVAL)
6738 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6739 if errcode is not None:
6740 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6743 self.source_x509_ca = cert
6745 src_instance_name = self.op.source_instance_name
6746 if not src_instance_name:
6747 raise errors.OpPrereqError("Missing source instance name",
6750 self.source_instance_name = \
6751 netutils.GetHostname(name=src_instance_name).name
6754 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6755 self.op.mode, errors.ECODE_INVAL)
6757 def ExpandNames(self):
6758 """ExpandNames for CreateInstance.
6760 Figure out the right locks for instance creation.
6763 self.needed_locks = {}
6765 instance_name = self.op.instance_name
6766 # this is just a preventive check, but someone might still add this
6767 # instance in the meantime, and creation will fail at lock-add time
6768 if instance_name in self.cfg.GetInstanceList():
6769 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6770 instance_name, errors.ECODE_EXISTS)
6772 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6774 if self.op.iallocator:
6775 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6777 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6778 nodelist = [self.op.pnode]
6779 if self.op.snode is not None:
6780 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6781 nodelist.append(self.op.snode)
6782 self.needed_locks[locking.LEVEL_NODE] = nodelist
6784 # in case of import lock the source node too
6785 if self.op.mode == constants.INSTANCE_IMPORT:
6786 src_node = self.op.src_node
6787 src_path = self.op.src_path
6789 if src_path is None:
6790 self.op.src_path = src_path = self.op.instance_name
6792 if src_node is None:
6793 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6794 self.op.src_node = None
6795 if os.path.isabs(src_path):
6796 raise errors.OpPrereqError("Importing an instance from an absolute"
6797 " path requires a source node option.",
6800 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6801 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6802 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6803 if not os.path.isabs(src_path):
6804 self.op.src_path = src_path = \
6805 utils.PathJoin(constants.EXPORT_DIR, src_path)
6807 def _RunAllocator(self):
6808 """Run the allocator based on input opcode.
6811 nics = [n.ToDict() for n in self.nics]
6812 ial = IAllocator(self.cfg, self.rpc,
6813 mode=constants.IALLOCATOR_MODE_ALLOC,
6814 name=self.op.instance_name,
6815 disk_template=self.op.disk_template,
6818 vcpus=self.be_full[constants.BE_VCPUS],
6819 mem_size=self.be_full[constants.BE_MEMORY],
6822 hypervisor=self.op.hypervisor,
6825 ial.Run(self.op.iallocator)
6828 raise errors.OpPrereqError("Can't compute nodes using"
6829 " iallocator '%s': %s" %
6830 (self.op.iallocator, ial.info),
6832 if len(ial.result) != ial.required_nodes:
6833 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6834 " of nodes (%s), required %s" %
6835 (self.op.iallocator, len(ial.result),
6836 ial.required_nodes), errors.ECODE_FAULT)
6837 self.op.pnode = ial.result[0]
6838 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6839 self.op.instance_name, self.op.iallocator,
6840 utils.CommaJoin(ial.result))
6841 if ial.required_nodes == 2:
6842 self.op.snode = ial.result[1]
6844 def BuildHooksEnv(self):
6847 This runs on master, primary and secondary nodes of the instance.
6851 "ADD_MODE": self.op.mode,
6853 if self.op.mode == constants.INSTANCE_IMPORT:
6854 env["SRC_NODE"] = self.op.src_node
6855 env["SRC_PATH"] = self.op.src_path
6856 env["SRC_IMAGES"] = self.src_images
6858 env.update(_BuildInstanceHookEnv(
6859 name=self.op.instance_name,
6860 primary_node=self.op.pnode,
6861 secondary_nodes=self.secondaries,
6862 status=self.op.start,
6863 os_type=self.op.os_type,
6864 memory=self.be_full[constants.BE_MEMORY],
6865 vcpus=self.be_full[constants.BE_VCPUS],
6866 nics=_NICListToTuple(self, self.nics),
6867 disk_template=self.op.disk_template,
6868 disks=[(d["size"], d["mode"]) for d in self.disks],
6871 hypervisor_name=self.op.hypervisor,
6874 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6878 def _ReadExportInfo(self):
6879 """Reads the export information from disk.
6881 It will override the opcode source node and path with the actual
6882 information, if these two were not specified before.
6884 @return: the export information
6887 assert self.op.mode == constants.INSTANCE_IMPORT
6889 src_node = self.op.src_node
6890 src_path = self.op.src_path
6892 if src_node is None:
6893 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6894 exp_list = self.rpc.call_export_list(locked_nodes)
6896 for node in exp_list:
6897 if exp_list[node].fail_msg:
6899 if src_path in exp_list[node].payload:
6901 self.op.src_node = src_node = node
6902 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6906 raise errors.OpPrereqError("No export found for relative path %s" %
6907 src_path, errors.ECODE_INVAL)
6909 _CheckNodeOnline(self, src_node)
6910 result = self.rpc.call_export_info(src_node, src_path)
6911 result.Raise("No export or invalid export found in dir %s" % src_path)
6913 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6914 if not export_info.has_section(constants.INISECT_EXP):
6915 raise errors.ProgrammerError("Corrupted export config",
6916 errors.ECODE_ENVIRON)
6918 ei_version = export_info.get(constants.INISECT_EXP, "version")
6919 if (int(ei_version) != constants.EXPORT_VERSION):
6920 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6921 (ei_version, constants.EXPORT_VERSION),
6922 errors.ECODE_ENVIRON)
6925 def _ReadExportParams(self, einfo):
6926 """Use export parameters as defaults.
6928 In case the opcode doesn't specify (as in override) some instance
6929 parameters, then try to use them from the export information, if
6933 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6935 if self.op.disk_template is None:
6936 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6937 self.op.disk_template = einfo.get(constants.INISECT_INS,
6940 raise errors.OpPrereqError("No disk template specified and the export"
6941 " is missing the disk_template information",
6944 if not self.op.disks:
6945 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6947 # TODO: import the disk iv_name too
6948 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6949 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6950 disks.append({"size": disk_sz})
6951 self.op.disks = disks
6953 raise errors.OpPrereqError("No disk info specified and the export"
6954 " is missing the disk information",
6957 if (not self.op.nics and
6958 einfo.has_option(constants.INISECT_INS, "nic_count")):
6960 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6962 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6963 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6968 if (self.op.hypervisor is None and
6969 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6970 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6971 if einfo.has_section(constants.INISECT_HYP):
6972 # use the export parameters but do not override the ones
6973 # specified by the user
6974 for name, value in einfo.items(constants.INISECT_HYP):
6975 if name not in self.op.hvparams:
6976 self.op.hvparams[name] = value
6978 if einfo.has_section(constants.INISECT_BEP):
6979 # use the parameters, without overriding
6980 for name, value in einfo.items(constants.INISECT_BEP):
6981 if name not in self.op.beparams:
6982 self.op.beparams[name] = value
6984 # try to read the parameters old style, from the main section
6985 for name in constants.BES_PARAMETERS:
6986 if (name not in self.op.beparams and
6987 einfo.has_option(constants.INISECT_INS, name)):
6988 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6990 if einfo.has_section(constants.INISECT_OSP):
6991 # use the parameters, without overriding
6992 for name, value in einfo.items(constants.INISECT_OSP):
6993 if name not in self.op.osparams:
6994 self.op.osparams[name] = value
6996 def _RevertToDefaults(self, cluster):
6997 """Revert the instance parameters to the default values.
7001 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7002 for name in self.op.hvparams.keys():
7003 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7004 del self.op.hvparams[name]
7006 be_defs = cluster.SimpleFillBE({})
7007 for name in self.op.beparams.keys():
7008 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7009 del self.op.beparams[name]
7011 nic_defs = cluster.SimpleFillNIC({})
7012 for nic in self.op.nics:
7013 for name in constants.NICS_PARAMETERS:
7014 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7017 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7018 for name in self.op.osparams.keys():
7019 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7020 del self.op.osparams[name]
7022 def CheckPrereq(self):
7023 """Check prerequisites.
7026 if self.op.mode == constants.INSTANCE_IMPORT:
7027 export_info = self._ReadExportInfo()
7028 self._ReadExportParams(export_info)
7030 _CheckDiskTemplate(self.op.disk_template)
7032 if (not self.cfg.GetVGName() and
7033 self.op.disk_template not in constants.DTS_NOT_LVM):
7034 raise errors.OpPrereqError("Cluster does not support lvm-based"
7035 " instances", errors.ECODE_STATE)
7037 if self.op.hypervisor is None:
7038 self.op.hypervisor = self.cfg.GetHypervisorType()
7040 cluster = self.cfg.GetClusterInfo()
7041 enabled_hvs = cluster.enabled_hypervisors
7042 if self.op.hypervisor not in enabled_hvs:
7043 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7044 " cluster (%s)" % (self.op.hypervisor,
7045 ",".join(enabled_hvs)),
7048 # check hypervisor parameter syntax (locally)
7049 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7050 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7052 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7053 hv_type.CheckParameterSyntax(filled_hvp)
7054 self.hv_full = filled_hvp
7055 # check that we don't specify global parameters on an instance
7056 _CheckGlobalHvParams(self.op.hvparams)
7058 # fill and remember the beparams dict
7059 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7060 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7062 # build os parameters
7063 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7065 # now that hvp/bep are in final format, let's reset to defaults,
7067 if self.op.identify_defaults:
7068 self._RevertToDefaults(cluster)
7072 for idx, nic in enumerate(self.op.nics):
7073 nic_mode_req = nic.get("mode", None)
7074 nic_mode = nic_mode_req
7075 if nic_mode is None:
7076 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7078 # in routed mode, for the first nic, the default ip is 'auto'
7079 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7080 default_ip_mode = constants.VALUE_AUTO
7082 default_ip_mode = constants.VALUE_NONE
7084 # ip validity checks
7085 ip = nic.get("ip", default_ip_mode)
7086 if ip is None or ip.lower() == constants.VALUE_NONE:
7088 elif ip.lower() == constants.VALUE_AUTO:
7089 if not self.op.name_check:
7090 raise errors.OpPrereqError("IP address set to auto but name checks"
7091 " have been skipped",
7093 nic_ip = self.hostname1.ip
7095 if not netutils.IPAddress.IsValid(ip):
7096 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7100 # TODO: check the ip address for uniqueness
7101 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7102 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7105 # MAC address verification
7106 mac = nic.get("mac", constants.VALUE_AUTO)
7107 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7108 mac = utils.NormalizeAndValidateMac(mac)
7111 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7112 except errors.ReservationError:
7113 raise errors.OpPrereqError("MAC address %s already in use"
7114 " in cluster" % mac,
7115 errors.ECODE_NOTUNIQUE)
7117 # bridge verification
7118 bridge = nic.get("bridge", None)
7119 link = nic.get("link", None)
7121 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7122 " at the same time", errors.ECODE_INVAL)
7123 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7124 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7131 nicparams[constants.NIC_MODE] = nic_mode_req
7133 nicparams[constants.NIC_LINK] = link
7135 check_params = cluster.SimpleFillNIC(nicparams)
7136 objects.NIC.CheckParameterSyntax(check_params)
7137 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7139 # disk checks/pre-build
7141 for disk in self.op.disks:
7142 mode = disk.get("mode", constants.DISK_RDWR)
7143 if mode not in constants.DISK_ACCESS_SET:
7144 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7145 mode, errors.ECODE_INVAL)
7146 size = disk.get("size", None)
7148 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7151 except (TypeError, ValueError):
7152 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7154 new_disk = {"size": size, "mode": mode}
7156 new_disk["adopt"] = disk["adopt"]
7157 self.disks.append(new_disk)
7159 if self.op.mode == constants.INSTANCE_IMPORT:
7161 # Check that the new instance doesn't have less disks than the export
7162 instance_disks = len(self.disks)
7163 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7164 if instance_disks < export_disks:
7165 raise errors.OpPrereqError("Not enough disks to import."
7166 " (instance: %d, export: %d)" %
7167 (instance_disks, export_disks),
7171 for idx in range(export_disks):
7172 option = 'disk%d_dump' % idx
7173 if export_info.has_option(constants.INISECT_INS, option):
7174 # FIXME: are the old os-es, disk sizes, etc. useful?
7175 export_name = export_info.get(constants.INISECT_INS, option)
7176 image = utils.PathJoin(self.op.src_path, export_name)
7177 disk_images.append(image)
7179 disk_images.append(False)
7181 self.src_images = disk_images
7183 old_name = export_info.get(constants.INISECT_INS, 'name')
7185 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7186 except (TypeError, ValueError), err:
7187 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7188 " an integer: %s" % str(err),
7190 if self.op.instance_name == old_name:
7191 for idx, nic in enumerate(self.nics):
7192 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7193 nic_mac_ini = 'nic%d_mac' % idx
7194 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7196 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7198 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7199 if self.op.ip_check:
7200 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7201 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7202 (self.check_ip, self.op.instance_name),
7203 errors.ECODE_NOTUNIQUE)
7205 #### mac address generation
7206 # By generating here the mac address both the allocator and the hooks get
7207 # the real final mac address rather than the 'auto' or 'generate' value.
7208 # There is a race condition between the generation and the instance object
7209 # creation, which means that we know the mac is valid now, but we're not
7210 # sure it will be when we actually add the instance. If things go bad
7211 # adding the instance will abort because of a duplicate mac, and the
7212 # creation job will fail.
7213 for nic in self.nics:
7214 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7215 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7219 if self.op.iallocator is not None:
7220 self._RunAllocator()
7222 #### node related checks
7224 # check primary node
7225 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7226 assert self.pnode is not None, \
7227 "Cannot retrieve locked node %s" % self.op.pnode
7229 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7230 pnode.name, errors.ECODE_STATE)
7232 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7233 pnode.name, errors.ECODE_STATE)
7235 self.secondaries = []
7237 # mirror node verification
7238 if self.op.disk_template in constants.DTS_NET_MIRROR:
7239 if self.op.snode == pnode.name:
7240 raise errors.OpPrereqError("The secondary node cannot be the"
7241 " primary node.", errors.ECODE_INVAL)
7242 _CheckNodeOnline(self, self.op.snode)
7243 _CheckNodeNotDrained(self, self.op.snode)
7244 self.secondaries.append(self.op.snode)
7246 nodenames = [pnode.name] + self.secondaries
7248 req_size = _ComputeDiskSize(self.op.disk_template,
7251 # Check lv size requirements, if not adopting
7252 if req_size is not None and not self.adopt_disks:
7253 _CheckNodesFreeDisk(self, nodenames, req_size)
7255 if self.adopt_disks: # instead, we must check the adoption data
7256 all_lvs = set([i["adopt"] for i in self.disks])
7257 if len(all_lvs) != len(self.disks):
7258 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7260 for lv_name in all_lvs:
7262 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7263 except errors.ReservationError:
7264 raise errors.OpPrereqError("LV named %s used by another instance" %
7265 lv_name, errors.ECODE_NOTUNIQUE)
7267 node_lvs = self.rpc.call_lv_list([pnode.name],
7268 self.cfg.GetVGName())[pnode.name]
7269 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7270 node_lvs = node_lvs.payload
7271 delta = all_lvs.difference(node_lvs.keys())
7273 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7274 utils.CommaJoin(delta),
7276 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7278 raise errors.OpPrereqError("Online logical volumes found, cannot"
7279 " adopt: %s" % utils.CommaJoin(online_lvs),
7281 # update the size of disk based on what is found
7282 for dsk in self.disks:
7283 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7285 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7287 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7288 # check OS parameters (remotely)
7289 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7291 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7293 # memory check on primary node
7295 _CheckNodeFreeMemory(self, self.pnode.name,
7296 "creating instance %s" % self.op.instance_name,
7297 self.be_full[constants.BE_MEMORY],
7300 self.dry_run_result = list(nodenames)
7302 def Exec(self, feedback_fn):
7303 """Create and add the instance to the cluster.
7306 instance = self.op.instance_name
7307 pnode_name = self.pnode.name
7309 ht_kind = self.op.hypervisor
7310 if ht_kind in constants.HTS_REQ_PORT:
7311 network_port = self.cfg.AllocatePort()
7315 if constants.ENABLE_FILE_STORAGE:
7316 # this is needed because os.path.join does not accept None arguments
7317 if self.op.file_storage_dir is None:
7318 string_file_storage_dir = ""
7320 string_file_storage_dir = self.op.file_storage_dir
7322 # build the full file storage dir path
7323 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7324 string_file_storage_dir, instance)
7326 file_storage_dir = ""
7328 disks = _GenerateDiskTemplate(self,
7329 self.op.disk_template,
7330 instance, pnode_name,
7334 self.op.file_driver,
7337 iobj = objects.Instance(name=instance, os=self.op.os_type,
7338 primary_node=pnode_name,
7339 nics=self.nics, disks=disks,
7340 disk_template=self.op.disk_template,
7342 network_port=network_port,
7343 beparams=self.op.beparams,
7344 hvparams=self.op.hvparams,
7345 hypervisor=self.op.hypervisor,
7346 osparams=self.op.osparams,
7349 if self.adopt_disks:
7350 # rename LVs to the newly-generated names; we need to construct
7351 # 'fake' LV disks with the old data, plus the new unique_id
7352 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7354 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7355 rename_to.append(t_dsk.logical_id)
7356 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7357 self.cfg.SetDiskID(t_dsk, pnode_name)
7358 result = self.rpc.call_blockdev_rename(pnode_name,
7359 zip(tmp_disks, rename_to))
7360 result.Raise("Failed to rename adoped LVs")
7362 feedback_fn("* creating instance disks...")
7364 _CreateDisks(self, iobj)
7365 except errors.OpExecError:
7366 self.LogWarning("Device creation failed, reverting...")
7368 _RemoveDisks(self, iobj)
7370 self.cfg.ReleaseDRBDMinors(instance)
7373 feedback_fn("adding instance %s to cluster config" % instance)
7375 self.cfg.AddInstance(iobj, self.proc.GetECId())
7377 # Declare that we don't want to remove the instance lock anymore, as we've
7378 # added the instance to the config
7379 del self.remove_locks[locking.LEVEL_INSTANCE]
7380 # Unlock all the nodes
7381 if self.op.mode == constants.INSTANCE_IMPORT:
7382 nodes_keep = [self.op.src_node]
7383 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7384 if node != self.op.src_node]
7385 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7386 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7388 self.context.glm.release(locking.LEVEL_NODE)
7389 del self.acquired_locks[locking.LEVEL_NODE]
7391 if self.op.wait_for_sync:
7392 disk_abort = not _WaitForSync(self, iobj)
7393 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7394 # make sure the disks are not degraded (still sync-ing is ok)
7396 feedback_fn("* checking mirrors status")
7397 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7402 _RemoveDisks(self, iobj)
7403 self.cfg.RemoveInstance(iobj.name)
7404 # Make sure the instance lock gets removed
7405 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7406 raise errors.OpExecError("There are some degraded disks for"
7409 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7410 if self.op.mode == constants.INSTANCE_CREATE:
7411 if not self.op.no_install:
7412 feedback_fn("* running the instance OS create scripts...")
7413 # FIXME: pass debug option from opcode to backend
7414 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7415 self.op.debug_level)
7416 result.Raise("Could not add os for instance %s"
7417 " on node %s" % (instance, pnode_name))
7419 elif self.op.mode == constants.INSTANCE_IMPORT:
7420 feedback_fn("* running the instance OS import scripts...")
7424 for idx, image in enumerate(self.src_images):
7428 # FIXME: pass debug option from opcode to backend
7429 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7430 constants.IEIO_FILE, (image, ),
7431 constants.IEIO_SCRIPT,
7432 (iobj.disks[idx], idx),
7434 transfers.append(dt)
7437 masterd.instance.TransferInstanceData(self, feedback_fn,
7438 self.op.src_node, pnode_name,
7439 self.pnode.secondary_ip,
7441 if not compat.all(import_result):
7442 self.LogWarning("Some disks for instance %s on node %s were not"
7443 " imported successfully" % (instance, pnode_name))
7445 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7446 feedback_fn("* preparing remote import...")
7447 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7448 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7450 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7451 self.source_x509_ca,
7452 self._cds, timeouts)
7453 if not compat.all(disk_results):
7454 # TODO: Should the instance still be started, even if some disks
7455 # failed to import (valid for local imports, too)?
7456 self.LogWarning("Some disks for instance %s on node %s were not"
7457 " imported successfully" % (instance, pnode_name))
7459 # Run rename script on newly imported instance
7460 assert iobj.name == instance
7461 feedback_fn("Running rename script for %s" % instance)
7462 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7463 self.source_instance_name,
7464 self.op.debug_level)
7466 self.LogWarning("Failed to run rename script for %s on node"
7467 " %s: %s" % (instance, pnode_name, result.fail_msg))
7470 # also checked in the prereq part
7471 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7475 iobj.admin_up = True
7476 self.cfg.Update(iobj, feedback_fn)
7477 logging.info("Starting instance %s on node %s", instance, pnode_name)
7478 feedback_fn("* starting instance...")
7479 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7480 result.Raise("Could not start instance")
7482 return list(iobj.all_nodes)
7485 class LUConnectConsole(NoHooksLU):
7486 """Connect to an instance's console.
7488 This is somewhat special in that it returns the command line that
7489 you need to run on the master node in order to connect to the
7498 def ExpandNames(self):
7499 self._ExpandAndLockInstance()
7501 def CheckPrereq(self):
7502 """Check prerequisites.
7504 This checks that the instance is in the cluster.
7507 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7508 assert self.instance is not None, \
7509 "Cannot retrieve locked instance %s" % self.op.instance_name
7510 _CheckNodeOnline(self, self.instance.primary_node)
7512 def Exec(self, feedback_fn):
7513 """Connect to the console of an instance
7516 instance = self.instance
7517 node = instance.primary_node
7519 node_insts = self.rpc.call_instance_list([node],
7520 [instance.hypervisor])[node]
7521 node_insts.Raise("Can't get node information from %s" % node)
7523 if instance.name not in node_insts.payload:
7524 raise errors.OpExecError("Instance %s is not running." % instance.name)
7526 logging.debug("Connecting to console of %s on %s", instance.name, node)
7528 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7529 cluster = self.cfg.GetClusterInfo()
7530 # beparams and hvparams are passed separately, to avoid editing the
7531 # instance and then saving the defaults in the instance itself.
7532 hvparams = cluster.FillHV(instance)
7533 beparams = cluster.FillBE(instance)
7534 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7537 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7540 class LUReplaceDisks(LogicalUnit):
7541 """Replace the disks of an instance.
7544 HPATH = "mirrors-replace"
7545 HTYPE = constants.HTYPE_INSTANCE
7548 ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7549 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7550 ("remote_node", None, _TMaybeString),
7551 ("iallocator", None, _TMaybeString),
7552 ("early_release", False, _TBool),
7556 def CheckArguments(self):
7557 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7560 def ExpandNames(self):
7561 self._ExpandAndLockInstance()
7563 if self.op.iallocator is not None:
7564 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7566 elif self.op.remote_node is not None:
7567 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7568 self.op.remote_node = remote_node
7570 # Warning: do not remove the locking of the new secondary here
7571 # unless DRBD8.AddChildren is changed to work in parallel;
7572 # currently it doesn't since parallel invocations of
7573 # FindUnusedMinor will conflict
7574 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7575 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7578 self.needed_locks[locking.LEVEL_NODE] = []
7579 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7581 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7582 self.op.iallocator, self.op.remote_node,
7583 self.op.disks, False, self.op.early_release)
7585 self.tasklets = [self.replacer]
7587 def DeclareLocks(self, level):
7588 # If we're not already locking all nodes in the set we have to declare the
7589 # instance's primary/secondary nodes.
7590 if (level == locking.LEVEL_NODE and
7591 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7592 self._LockInstancesNodes()
7594 def BuildHooksEnv(self):
7597 This runs on the master, the primary and all the secondaries.
7600 instance = self.replacer.instance
7602 "MODE": self.op.mode,
7603 "NEW_SECONDARY": self.op.remote_node,
7604 "OLD_SECONDARY": instance.secondary_nodes[0],
7606 env.update(_BuildInstanceHookEnvByObject(self, instance))
7608 self.cfg.GetMasterNode(),
7609 instance.primary_node,
7611 if self.op.remote_node is not None:
7612 nl.append(self.op.remote_node)
7616 class TLReplaceDisks(Tasklet):
7617 """Replaces disks for an instance.
7619 Note: Locking is not within the scope of this class.
7622 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7623 disks, delay_iallocator, early_release):
7624 """Initializes this class.
7627 Tasklet.__init__(self, lu)
7630 self.instance_name = instance_name
7632 self.iallocator_name = iallocator_name
7633 self.remote_node = remote_node
7635 self.delay_iallocator = delay_iallocator
7636 self.early_release = early_release
7639 self.instance = None
7640 self.new_node = None
7641 self.target_node = None
7642 self.other_node = None
7643 self.remote_node_info = None
7644 self.node_secondary_ip = None
7647 def CheckArguments(mode, remote_node, iallocator):
7648 """Helper function for users of this class.
7651 # check for valid parameter combination
7652 if mode == constants.REPLACE_DISK_CHG:
7653 if remote_node is None and iallocator is None:
7654 raise errors.OpPrereqError("When changing the secondary either an"
7655 " iallocator script must be used or the"
7656 " new node given", errors.ECODE_INVAL)
7658 if remote_node is not None and iallocator is not None:
7659 raise errors.OpPrereqError("Give either the iallocator or the new"
7660 " secondary, not both", errors.ECODE_INVAL)
7662 elif remote_node is not None or iallocator is not None:
7663 # Not replacing the secondary
7664 raise errors.OpPrereqError("The iallocator and new node options can"
7665 " only be used when changing the"
7666 " secondary node", errors.ECODE_INVAL)
7669 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7670 """Compute a new secondary node using an IAllocator.
7673 ial = IAllocator(lu.cfg, lu.rpc,
7674 mode=constants.IALLOCATOR_MODE_RELOC,
7676 relocate_from=relocate_from)
7678 ial.Run(iallocator_name)
7681 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7682 " %s" % (iallocator_name, ial.info),
7685 if len(ial.result) != ial.required_nodes:
7686 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7687 " of nodes (%s), required %s" %
7689 len(ial.result), ial.required_nodes),
7692 remote_node_name = ial.result[0]
7694 lu.LogInfo("Selected new secondary for instance '%s': %s",
7695 instance_name, remote_node_name)
7697 return remote_node_name
7699 def _FindFaultyDisks(self, node_name):
7700 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7703 def CheckPrereq(self):
7704 """Check prerequisites.
7706 This checks that the instance is in the cluster.
7709 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7710 assert instance is not None, \
7711 "Cannot retrieve locked instance %s" % self.instance_name
7713 if instance.disk_template != constants.DT_DRBD8:
7714 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7715 " instances", errors.ECODE_INVAL)
7717 if len(instance.secondary_nodes) != 1:
7718 raise errors.OpPrereqError("The instance has a strange layout,"
7719 " expected one secondary but found %d" %
7720 len(instance.secondary_nodes),
7723 if not self.delay_iallocator:
7724 self._CheckPrereq2()
7726 def _CheckPrereq2(self):
7727 """Check prerequisites, second part.
7729 This function should always be part of CheckPrereq. It was separated and is
7730 now called from Exec because during node evacuation iallocator was only
7731 called with an unmodified cluster model, not taking planned changes into
7735 instance = self.instance
7736 secondary_node = instance.secondary_nodes[0]
7738 if self.iallocator_name is None:
7739 remote_node = self.remote_node
7741 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7742 instance.name, instance.secondary_nodes)
7744 if remote_node is not None:
7745 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7746 assert self.remote_node_info is not None, \
7747 "Cannot retrieve locked node %s" % remote_node
7749 self.remote_node_info = None
7751 if remote_node == self.instance.primary_node:
7752 raise errors.OpPrereqError("The specified node is the primary node of"
7753 " the instance.", errors.ECODE_INVAL)
7755 if remote_node == secondary_node:
7756 raise errors.OpPrereqError("The specified node is already the"
7757 " secondary node of the instance.",
7760 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7761 constants.REPLACE_DISK_CHG):
7762 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7765 if self.mode == constants.REPLACE_DISK_AUTO:
7766 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7767 faulty_secondary = self._FindFaultyDisks(secondary_node)
7769 if faulty_primary and faulty_secondary:
7770 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7771 " one node and can not be repaired"
7772 " automatically" % self.instance_name,
7776 self.disks = faulty_primary
7777 self.target_node = instance.primary_node
7778 self.other_node = secondary_node
7779 check_nodes = [self.target_node, self.other_node]
7780 elif faulty_secondary:
7781 self.disks = faulty_secondary
7782 self.target_node = secondary_node
7783 self.other_node = instance.primary_node
7784 check_nodes = [self.target_node, self.other_node]
7790 # Non-automatic modes
7791 if self.mode == constants.REPLACE_DISK_PRI:
7792 self.target_node = instance.primary_node
7793 self.other_node = secondary_node
7794 check_nodes = [self.target_node, self.other_node]
7796 elif self.mode == constants.REPLACE_DISK_SEC:
7797 self.target_node = secondary_node
7798 self.other_node = instance.primary_node
7799 check_nodes = [self.target_node, self.other_node]
7801 elif self.mode == constants.REPLACE_DISK_CHG:
7802 self.new_node = remote_node
7803 self.other_node = instance.primary_node
7804 self.target_node = secondary_node
7805 check_nodes = [self.new_node, self.other_node]
7807 _CheckNodeNotDrained(self.lu, remote_node)
7809 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7810 assert old_node_info is not None
7811 if old_node_info.offline and not self.early_release:
7812 # doesn't make sense to delay the release
7813 self.early_release = True
7814 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7815 " early-release mode", secondary_node)
7818 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7821 # If not specified all disks should be replaced
7823 self.disks = range(len(self.instance.disks))
7825 for node in check_nodes:
7826 _CheckNodeOnline(self.lu, node)
7828 # Check whether disks are valid
7829 for disk_idx in self.disks:
7830 instance.FindDisk(disk_idx)
7832 # Get secondary node IP addresses
7835 for node_name in [self.target_node, self.other_node, self.new_node]:
7836 if node_name is not None:
7837 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7839 self.node_secondary_ip = node_2nd_ip
7841 def Exec(self, feedback_fn):
7842 """Execute disk replacement.
7844 This dispatches the disk replacement to the appropriate handler.
7847 if self.delay_iallocator:
7848 self._CheckPrereq2()
7851 feedback_fn("No disks need replacement")
7854 feedback_fn("Replacing disk(s) %s for %s" %
7855 (utils.CommaJoin(self.disks), self.instance.name))
7857 activate_disks = (not self.instance.admin_up)
7859 # Activate the instance disks if we're replacing them on a down instance
7861 _StartInstanceDisks(self.lu, self.instance, True)
7864 # Should we replace the secondary node?
7865 if self.new_node is not None:
7866 fn = self._ExecDrbd8Secondary
7868 fn = self._ExecDrbd8DiskOnly
7870 return fn(feedback_fn)
7873 # Deactivate the instance disks if we're replacing them on a
7876 _SafeShutdownInstanceDisks(self.lu, self.instance)
7878 def _CheckVolumeGroup(self, nodes):
7879 self.lu.LogInfo("Checking volume groups")
7881 vgname = self.cfg.GetVGName()
7883 # Make sure volume group exists on all involved nodes
7884 results = self.rpc.call_vg_list(nodes)
7886 raise errors.OpExecError("Can't list volume groups on the nodes")
7890 res.Raise("Error checking node %s" % node)
7891 if vgname not in res.payload:
7892 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7895 def _CheckDisksExistence(self, nodes):
7896 # Check disk existence
7897 for idx, dev in enumerate(self.instance.disks):
7898 if idx not in self.disks:
7902 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7903 self.cfg.SetDiskID(dev, node)
7905 result = self.rpc.call_blockdev_find(node, dev)
7907 msg = result.fail_msg
7908 if msg or not result.payload:
7910 msg = "disk not found"
7911 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7914 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7915 for idx, dev in enumerate(self.instance.disks):
7916 if idx not in self.disks:
7919 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7922 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7924 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7925 " replace disks for instance %s" %
7926 (node_name, self.instance.name))
7928 def _CreateNewStorage(self, node_name):
7929 vgname = self.cfg.GetVGName()
7932 for idx, dev in enumerate(self.instance.disks):
7933 if idx not in self.disks:
7936 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7938 self.cfg.SetDiskID(dev, node_name)
7940 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7941 names = _GenerateUniqueNames(self.lu, lv_names)
7943 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7944 logical_id=(vgname, names[0]))
7945 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7946 logical_id=(vgname, names[1]))
7948 new_lvs = [lv_data, lv_meta]
7949 old_lvs = dev.children
7950 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7952 # we pass force_create=True to force the LVM creation
7953 for new_lv in new_lvs:
7954 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7955 _GetInstanceInfoText(self.instance), False)
7959 def _CheckDevices(self, node_name, iv_names):
7960 for name, (dev, _, _) in iv_names.iteritems():
7961 self.cfg.SetDiskID(dev, node_name)
7963 result = self.rpc.call_blockdev_find(node_name, dev)
7965 msg = result.fail_msg
7966 if msg or not result.payload:
7968 msg = "disk not found"
7969 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7972 if result.payload.is_degraded:
7973 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7975 def _RemoveOldStorage(self, node_name, iv_names):
7976 for name, (_, old_lvs, _) in iv_names.iteritems():
7977 self.lu.LogInfo("Remove logical volumes for %s" % name)
7980 self.cfg.SetDiskID(lv, node_name)
7982 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7984 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7985 hint="remove unused LVs manually")
7987 def _ReleaseNodeLock(self, node_name):
7988 """Releases the lock for a given node."""
7989 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7991 def _ExecDrbd8DiskOnly(self, feedback_fn):
7992 """Replace a disk on the primary or secondary for DRBD 8.
7994 The algorithm for replace is quite complicated:
7996 1. for each disk to be replaced:
7998 1. create new LVs on the target node with unique names
7999 1. detach old LVs from the drbd device
8000 1. rename old LVs to name_replaced.<time_t>
8001 1. rename new LVs to old LVs
8002 1. attach the new LVs (with the old names now) to the drbd device
8004 1. wait for sync across all devices
8006 1. for each modified disk:
8008 1. remove old LVs (which have the name name_replaces.<time_t>)
8010 Failures are not very well handled.
8015 # Step: check device activation
8016 self.lu.LogStep(1, steps_total, "Check device existence")
8017 self._CheckDisksExistence([self.other_node, self.target_node])
8018 self._CheckVolumeGroup([self.target_node, self.other_node])
8020 # Step: check other node consistency
8021 self.lu.LogStep(2, steps_total, "Check peer consistency")
8022 self._CheckDisksConsistency(self.other_node,
8023 self.other_node == self.instance.primary_node,
8026 # Step: create new storage
8027 self.lu.LogStep(3, steps_total, "Allocate new storage")
8028 iv_names = self._CreateNewStorage(self.target_node)
8030 # Step: for each lv, detach+rename*2+attach
8031 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8032 for dev, old_lvs, new_lvs in iv_names.itervalues():
8033 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8035 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8037 result.Raise("Can't detach drbd from local storage on node"
8038 " %s for device %s" % (self.target_node, dev.iv_name))
8040 #cfg.Update(instance)
8042 # ok, we created the new LVs, so now we know we have the needed
8043 # storage; as such, we proceed on the target node to rename
8044 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8045 # using the assumption that logical_id == physical_id (which in
8046 # turn is the unique_id on that node)
8048 # FIXME(iustin): use a better name for the replaced LVs
8049 temp_suffix = int(time.time())
8050 ren_fn = lambda d, suff: (d.physical_id[0],
8051 d.physical_id[1] + "_replaced-%s" % suff)
8053 # Build the rename list based on what LVs exist on the node
8054 rename_old_to_new = []
8055 for to_ren in old_lvs:
8056 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8057 if not result.fail_msg and result.payload:
8059 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8061 self.lu.LogInfo("Renaming the old LVs on the target node")
8062 result = self.rpc.call_blockdev_rename(self.target_node,
8064 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8066 # Now we rename the new LVs to the old LVs
8067 self.lu.LogInfo("Renaming the new LVs on the target node")
8068 rename_new_to_old = [(new, old.physical_id)
8069 for old, new in zip(old_lvs, new_lvs)]
8070 result = self.rpc.call_blockdev_rename(self.target_node,
8072 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8074 for old, new in zip(old_lvs, new_lvs):
8075 new.logical_id = old.logical_id
8076 self.cfg.SetDiskID(new, self.target_node)
8078 for disk in old_lvs:
8079 disk.logical_id = ren_fn(disk, temp_suffix)
8080 self.cfg.SetDiskID(disk, self.target_node)
8082 # Now that the new lvs have the old name, we can add them to the device
8083 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8084 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8086 msg = result.fail_msg
8088 for new_lv in new_lvs:
8089 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8092 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8093 hint=("cleanup manually the unused logical"
8095 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8097 dev.children = new_lvs
8099 self.cfg.Update(self.instance, feedback_fn)
8102 if self.early_release:
8103 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8105 self._RemoveOldStorage(self.target_node, iv_names)
8106 # WARNING: we release both node locks here, do not do other RPCs
8107 # than WaitForSync to the primary node
8108 self._ReleaseNodeLock([self.target_node, self.other_node])
8111 # This can fail as the old devices are degraded and _WaitForSync
8112 # does a combined result over all disks, so we don't check its return value
8113 self.lu.LogStep(cstep, steps_total, "Sync devices")
8115 _WaitForSync(self.lu, self.instance)
8117 # Check all devices manually
8118 self._CheckDevices(self.instance.primary_node, iv_names)
8120 # Step: remove old storage
8121 if not self.early_release:
8122 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8124 self._RemoveOldStorage(self.target_node, iv_names)
8126 def _ExecDrbd8Secondary(self, feedback_fn):
8127 """Replace the secondary node for DRBD 8.
8129 The algorithm for replace is quite complicated:
8130 - for all disks of the instance:
8131 - create new LVs on the new node with same names
8132 - shutdown the drbd device on the old secondary
8133 - disconnect the drbd network on the primary
8134 - create the drbd device on the new secondary
8135 - network attach the drbd on the primary, using an artifice:
8136 the drbd code for Attach() will connect to the network if it
8137 finds a device which is connected to the good local disks but
8139 - wait for sync across all devices
8140 - remove all disks from the old secondary
8142 Failures are not very well handled.
8147 # Step: check device activation
8148 self.lu.LogStep(1, steps_total, "Check device existence")
8149 self._CheckDisksExistence([self.instance.primary_node])
8150 self._CheckVolumeGroup([self.instance.primary_node])
8152 # Step: check other node consistency
8153 self.lu.LogStep(2, steps_total, "Check peer consistency")
8154 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8156 # Step: create new storage
8157 self.lu.LogStep(3, steps_total, "Allocate new storage")
8158 for idx, dev in enumerate(self.instance.disks):
8159 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8160 (self.new_node, idx))
8161 # we pass force_create=True to force LVM creation
8162 for new_lv in dev.children:
8163 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8164 _GetInstanceInfoText(self.instance), False)
8166 # Step 4: dbrd minors and drbd setups changes
8167 # after this, we must manually remove the drbd minors on both the
8168 # error and the success paths
8169 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8170 minors = self.cfg.AllocateDRBDMinor([self.new_node
8171 for dev in self.instance.disks],
8173 logging.debug("Allocated minors %r", minors)
8176 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8177 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8178 (self.new_node, idx))
8179 # create new devices on new_node; note that we create two IDs:
8180 # one without port, so the drbd will be activated without
8181 # networking information on the new node at this stage, and one
8182 # with network, for the latter activation in step 4
8183 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8184 if self.instance.primary_node == o_node1:
8187 assert self.instance.primary_node == o_node2, "Three-node instance?"
8190 new_alone_id = (self.instance.primary_node, self.new_node, None,
8191 p_minor, new_minor, o_secret)
8192 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8193 p_minor, new_minor, o_secret)
8195 iv_names[idx] = (dev, dev.children, new_net_id)
8196 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8198 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8199 logical_id=new_alone_id,
8200 children=dev.children,
8203 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8204 _GetInstanceInfoText(self.instance), False)
8205 except errors.GenericError:
8206 self.cfg.ReleaseDRBDMinors(self.instance.name)
8209 # We have new devices, shutdown the drbd on the old secondary
8210 for idx, dev in enumerate(self.instance.disks):
8211 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8212 self.cfg.SetDiskID(dev, self.target_node)
8213 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8215 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8216 "node: %s" % (idx, msg),
8217 hint=("Please cleanup this device manually as"
8218 " soon as possible"))
8220 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8221 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8222 self.node_secondary_ip,
8223 self.instance.disks)\
8224 [self.instance.primary_node]
8226 msg = result.fail_msg
8228 # detaches didn't succeed (unlikely)
8229 self.cfg.ReleaseDRBDMinors(self.instance.name)
8230 raise errors.OpExecError("Can't detach the disks from the network on"
8231 " old node: %s" % (msg,))
8233 # if we managed to detach at least one, we update all the disks of
8234 # the instance to point to the new secondary
8235 self.lu.LogInfo("Updating instance configuration")
8236 for dev, _, new_logical_id in iv_names.itervalues():
8237 dev.logical_id = new_logical_id
8238 self.cfg.SetDiskID(dev, self.instance.primary_node)
8240 self.cfg.Update(self.instance, feedback_fn)
8242 # and now perform the drbd attach
8243 self.lu.LogInfo("Attaching primary drbds to new secondary"
8244 " (standalone => connected)")
8245 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8247 self.node_secondary_ip,
8248 self.instance.disks,
8251 for to_node, to_result in result.items():
8252 msg = to_result.fail_msg
8254 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8256 hint=("please do a gnt-instance info to see the"
8257 " status of disks"))
8259 if self.early_release:
8260 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8262 self._RemoveOldStorage(self.target_node, iv_names)
8263 # WARNING: we release all node locks here, do not do other RPCs
8264 # than WaitForSync to the primary node
8265 self._ReleaseNodeLock([self.instance.primary_node,
8270 # This can fail as the old devices are degraded and _WaitForSync
8271 # does a combined result over all disks, so we don't check its return value
8272 self.lu.LogStep(cstep, steps_total, "Sync devices")
8274 _WaitForSync(self.lu, self.instance)
8276 # Check all devices manually
8277 self._CheckDevices(self.instance.primary_node, iv_names)
8279 # Step: remove old storage
8280 if not self.early_release:
8281 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8282 self._RemoveOldStorage(self.target_node, iv_names)
8285 class LURepairNodeStorage(NoHooksLU):
8286 """Repairs the volume group on a node.
8291 ("storage_type", _NoDefault, _CheckStorageType),
8292 ("name", _NoDefault, _TNonEmptyString),
8293 ("ignore_consistency", False, _TBool),
8297 def CheckArguments(self):
8298 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8300 storage_type = self.op.storage_type
8302 if (constants.SO_FIX_CONSISTENCY not in
8303 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8304 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8305 " repaired" % storage_type,
8308 def ExpandNames(self):
8309 self.needed_locks = {
8310 locking.LEVEL_NODE: [self.op.node_name],
8313 def _CheckFaultyDisks(self, instance, node_name):
8314 """Ensure faulty disks abort the opcode or at least warn."""
8316 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8318 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8319 " node '%s'" % (instance.name, node_name),
8321 except errors.OpPrereqError, err:
8322 if self.op.ignore_consistency:
8323 self.proc.LogWarning(str(err.args[0]))
8327 def CheckPrereq(self):
8328 """Check prerequisites.
8331 # Check whether any instance on this node has faulty disks
8332 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8333 if not inst.admin_up:
8335 check_nodes = set(inst.all_nodes)
8336 check_nodes.discard(self.op.node_name)
8337 for inst_node_name in check_nodes:
8338 self._CheckFaultyDisks(inst, inst_node_name)
8340 def Exec(self, feedback_fn):
8341 feedback_fn("Repairing storage unit '%s' on %s ..." %
8342 (self.op.name, self.op.node_name))
8344 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8345 result = self.rpc.call_storage_execute(self.op.node_name,
8346 self.op.storage_type, st_args,
8348 constants.SO_FIX_CONSISTENCY)
8349 result.Raise("Failed to repair storage unit '%s' on %s" %
8350 (self.op.name, self.op.node_name))
8353 class LUNodeEvacuationStrategy(NoHooksLU):
8354 """Computes the node evacuation strategy.
8358 ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8359 ("remote_node", None, _TMaybeString),
8360 ("iallocator", None, _TMaybeString),
8364 def CheckArguments(self):
8365 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8367 def ExpandNames(self):
8368 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8369 self.needed_locks = locks = {}
8370 if self.op.remote_node is None:
8371 locks[locking.LEVEL_NODE] = locking.ALL_SET
8373 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8374 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8376 def Exec(self, feedback_fn):
8377 if self.op.remote_node is not None:
8379 for node in self.op.nodes:
8380 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8383 if i.primary_node == self.op.remote_node:
8384 raise errors.OpPrereqError("Node %s is the primary node of"
8385 " instance %s, cannot use it as"
8387 (self.op.remote_node, i.name),
8389 result.append([i.name, self.op.remote_node])
8391 ial = IAllocator(self.cfg, self.rpc,
8392 mode=constants.IALLOCATOR_MODE_MEVAC,
8393 evac_nodes=self.op.nodes)
8394 ial.Run(self.op.iallocator, validate=True)
8396 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8402 class LUGrowDisk(LogicalUnit):
8403 """Grow a disk of an instance.
8407 HTYPE = constants.HTYPE_INSTANCE
8410 ("disk", _NoDefault, _TInt),
8411 ("amount", _NoDefault, _TInt),
8412 ("wait_for_sync", True, _TBool),
8416 def ExpandNames(self):
8417 self._ExpandAndLockInstance()
8418 self.needed_locks[locking.LEVEL_NODE] = []
8419 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8421 def DeclareLocks(self, level):
8422 if level == locking.LEVEL_NODE:
8423 self._LockInstancesNodes()
8425 def BuildHooksEnv(self):
8428 This runs on the master, the primary and all the secondaries.
8432 "DISK": self.op.disk,
8433 "AMOUNT": self.op.amount,
8435 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8436 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8439 def CheckPrereq(self):
8440 """Check prerequisites.
8442 This checks that the instance is in the cluster.
8445 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8446 assert instance is not None, \
8447 "Cannot retrieve locked instance %s" % self.op.instance_name
8448 nodenames = list(instance.all_nodes)
8449 for node in nodenames:
8450 _CheckNodeOnline(self, node)
8452 self.instance = instance
8454 if instance.disk_template not in constants.DTS_GROWABLE:
8455 raise errors.OpPrereqError("Instance's disk layout does not support"
8456 " growing.", errors.ECODE_INVAL)
8458 self.disk = instance.FindDisk(self.op.disk)
8460 if instance.disk_template != constants.DT_FILE:
8461 # TODO: check the free disk space for file, when that feature will be
8463 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8465 def Exec(self, feedback_fn):
8466 """Execute disk grow.
8469 instance = self.instance
8472 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8474 raise errors.OpExecError("Cannot activate block device to grow")
8476 for node in instance.all_nodes:
8477 self.cfg.SetDiskID(disk, node)
8478 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8479 result.Raise("Grow request failed to node %s" % node)
8481 # TODO: Rewrite code to work properly
8482 # DRBD goes into sync mode for a short amount of time after executing the
8483 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8484 # calling "resize" in sync mode fails. Sleeping for a short amount of
8485 # time is a work-around.
8488 disk.RecordGrow(self.op.amount)
8489 self.cfg.Update(instance, feedback_fn)
8490 if self.op.wait_for_sync:
8491 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8493 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8494 " status.\nPlease check the instance.")
8495 if not instance.admin_up:
8496 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8497 elif not instance.admin_up:
8498 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8499 " not supposed to be running because no wait for"
8500 " sync mode was requested.")
8503 class LUQueryInstanceData(NoHooksLU):
8504 """Query runtime instance data.
8508 ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8509 ("static", False, _TBool),
8513 def ExpandNames(self):
8514 self.needed_locks = {}
8515 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8517 if self.op.instances:
8518 self.wanted_names = []
8519 for name in self.op.instances:
8520 full_name = _ExpandInstanceName(self.cfg, name)
8521 self.wanted_names.append(full_name)
8522 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8524 self.wanted_names = None
8525 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8527 self.needed_locks[locking.LEVEL_NODE] = []
8528 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8530 def DeclareLocks(self, level):
8531 if level == locking.LEVEL_NODE:
8532 self._LockInstancesNodes()
8534 def CheckPrereq(self):
8535 """Check prerequisites.
8537 This only checks the optional instance list against the existing names.
8540 if self.wanted_names is None:
8541 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8543 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8544 in self.wanted_names]
8546 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8547 """Returns the status of a block device
8550 if self.op.static or not node:
8553 self.cfg.SetDiskID(dev, node)
8555 result = self.rpc.call_blockdev_find(node, dev)
8559 result.Raise("Can't compute disk status for %s" % instance_name)
8561 status = result.payload
8565 return (status.dev_path, status.major, status.minor,
8566 status.sync_percent, status.estimated_time,
8567 status.is_degraded, status.ldisk_status)
8569 def _ComputeDiskStatus(self, instance, snode, dev):
8570 """Compute block device status.
8573 if dev.dev_type in constants.LDS_DRBD:
8574 # we change the snode then (otherwise we use the one passed in)
8575 if dev.logical_id[0] == instance.primary_node:
8576 snode = dev.logical_id[1]
8578 snode = dev.logical_id[0]
8580 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8582 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8585 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8586 for child in dev.children]
8591 "iv_name": dev.iv_name,
8592 "dev_type": dev.dev_type,
8593 "logical_id": dev.logical_id,
8594 "physical_id": dev.physical_id,
8595 "pstatus": dev_pstatus,
8596 "sstatus": dev_sstatus,
8597 "children": dev_children,
8604 def Exec(self, feedback_fn):
8605 """Gather and return data"""
8608 cluster = self.cfg.GetClusterInfo()
8610 for instance in self.wanted_instances:
8611 if not self.op.static:
8612 remote_info = self.rpc.call_instance_info(instance.primary_node,
8614 instance.hypervisor)
8615 remote_info.Raise("Error checking node %s" % instance.primary_node)
8616 remote_info = remote_info.payload
8617 if remote_info and "state" in remote_info:
8620 remote_state = "down"
8623 if instance.admin_up:
8626 config_state = "down"
8628 disks = [self._ComputeDiskStatus(instance, None, device)
8629 for device in instance.disks]
8632 "name": instance.name,
8633 "config_state": config_state,
8634 "run_state": remote_state,
8635 "pnode": instance.primary_node,
8636 "snodes": instance.secondary_nodes,
8638 # this happens to be the same format used for hooks
8639 "nics": _NICListToTuple(self, instance.nics),
8640 "disk_template": instance.disk_template,
8642 "hypervisor": instance.hypervisor,
8643 "network_port": instance.network_port,
8644 "hv_instance": instance.hvparams,
8645 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8646 "be_instance": instance.beparams,
8647 "be_actual": cluster.FillBE(instance),
8648 "os_instance": instance.osparams,
8649 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8650 "serial_no": instance.serial_no,
8651 "mtime": instance.mtime,
8652 "ctime": instance.ctime,
8653 "uuid": instance.uuid,
8656 result[instance.name] = idict
8661 class LUSetInstanceParams(LogicalUnit):
8662 """Modifies an instances's parameters.
8665 HPATH = "instance-modify"
8666 HTYPE = constants.HTYPE_INSTANCE
8669 ("nics", _EmptyList, _TList),
8670 ("disks", _EmptyList, _TList),
8671 ("beparams", _EmptyDict, _TDict),
8672 ("hvparams", _EmptyDict, _TDict),
8673 ("disk_template", None, _TMaybeString),
8674 ("remote_node", None, _TMaybeString),
8675 ("os_name", None, _TMaybeString),
8676 ("force_variant", False, _TBool),
8677 ("osparams", None, _TOr(_TDict, _TNone)),
8682 def CheckArguments(self):
8683 if not (self.op.nics or self.op.disks or self.op.disk_template or
8684 self.op.hvparams or self.op.beparams or self.op.os_name):
8685 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8687 if self.op.hvparams:
8688 _CheckGlobalHvParams(self.op.hvparams)
8692 for disk_op, disk_dict in self.op.disks:
8693 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8694 if disk_op == constants.DDM_REMOVE:
8697 elif disk_op == constants.DDM_ADD:
8700 if not isinstance(disk_op, int):
8701 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8702 if not isinstance(disk_dict, dict):
8703 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8704 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8706 if disk_op == constants.DDM_ADD:
8707 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8708 if mode not in constants.DISK_ACCESS_SET:
8709 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8711 size = disk_dict.get('size', None)
8713 raise errors.OpPrereqError("Required disk parameter size missing",
8717 except (TypeError, ValueError), err:
8718 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8719 str(err), errors.ECODE_INVAL)
8720 disk_dict['size'] = size
8722 # modification of disk
8723 if 'size' in disk_dict:
8724 raise errors.OpPrereqError("Disk size change not possible, use"
8725 " grow-disk", errors.ECODE_INVAL)
8727 if disk_addremove > 1:
8728 raise errors.OpPrereqError("Only one disk add or remove operation"
8729 " supported at a time", errors.ECODE_INVAL)
8731 if self.op.disks and self.op.disk_template is not None:
8732 raise errors.OpPrereqError("Disk template conversion and other disk"
8733 " changes not supported at the same time",
8736 if self.op.disk_template:
8737 _CheckDiskTemplate(self.op.disk_template)
8738 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8739 self.op.remote_node is None):
8740 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8741 " one requires specifying a secondary node",
8746 for nic_op, nic_dict in self.op.nics:
8747 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8748 if nic_op == constants.DDM_REMOVE:
8751 elif nic_op == constants.DDM_ADD:
8754 if not isinstance(nic_op, int):
8755 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8756 if not isinstance(nic_dict, dict):
8757 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8758 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8760 # nic_dict should be a dict
8761 nic_ip = nic_dict.get('ip', None)
8762 if nic_ip is not None:
8763 if nic_ip.lower() == constants.VALUE_NONE:
8764 nic_dict['ip'] = None
8766 if not netutils.IPAddress.IsValid(nic_ip):
8767 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8770 nic_bridge = nic_dict.get('bridge', None)
8771 nic_link = nic_dict.get('link', None)
8772 if nic_bridge and nic_link:
8773 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8774 " at the same time", errors.ECODE_INVAL)
8775 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8776 nic_dict['bridge'] = None
8777 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8778 nic_dict['link'] = None
8780 if nic_op == constants.DDM_ADD:
8781 nic_mac = nic_dict.get('mac', None)
8783 nic_dict['mac'] = constants.VALUE_AUTO
8785 if 'mac' in nic_dict:
8786 nic_mac = nic_dict['mac']
8787 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8788 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8790 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8791 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8792 " modifying an existing nic",
8795 if nic_addremove > 1:
8796 raise errors.OpPrereqError("Only one NIC add or remove operation"
8797 " supported at a time", errors.ECODE_INVAL)
8799 def ExpandNames(self):
8800 self._ExpandAndLockInstance()
8801 self.needed_locks[locking.LEVEL_NODE] = []
8802 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8804 def DeclareLocks(self, level):
8805 if level == locking.LEVEL_NODE:
8806 self._LockInstancesNodes()
8807 if self.op.disk_template and self.op.remote_node:
8808 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8809 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8811 def BuildHooksEnv(self):
8814 This runs on the master, primary and secondaries.
8818 if constants.BE_MEMORY in self.be_new:
8819 args['memory'] = self.be_new[constants.BE_MEMORY]
8820 if constants.BE_VCPUS in self.be_new:
8821 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8822 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8823 # information at all.
8826 nic_override = dict(self.op.nics)
8827 for idx, nic in enumerate(self.instance.nics):
8828 if idx in nic_override:
8829 this_nic_override = nic_override[idx]
8831 this_nic_override = {}
8832 if 'ip' in this_nic_override:
8833 ip = this_nic_override['ip']
8836 if 'mac' in this_nic_override:
8837 mac = this_nic_override['mac']
8840 if idx in self.nic_pnew:
8841 nicparams = self.nic_pnew[idx]
8843 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8844 mode = nicparams[constants.NIC_MODE]
8845 link = nicparams[constants.NIC_LINK]
8846 args['nics'].append((ip, mac, mode, link))
8847 if constants.DDM_ADD in nic_override:
8848 ip = nic_override[constants.DDM_ADD].get('ip', None)
8849 mac = nic_override[constants.DDM_ADD]['mac']
8850 nicparams = self.nic_pnew[constants.DDM_ADD]
8851 mode = nicparams[constants.NIC_MODE]
8852 link = nicparams[constants.NIC_LINK]
8853 args['nics'].append((ip, mac, mode, link))
8854 elif constants.DDM_REMOVE in nic_override:
8855 del args['nics'][-1]
8857 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8858 if self.op.disk_template:
8859 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8860 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8863 def CheckPrereq(self):
8864 """Check prerequisites.
8866 This only checks the instance list against the existing names.
8869 # checking the new params on the primary/secondary nodes
8871 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8872 cluster = self.cluster = self.cfg.GetClusterInfo()
8873 assert self.instance is not None, \
8874 "Cannot retrieve locked instance %s" % self.op.instance_name
8875 pnode = instance.primary_node
8876 nodelist = list(instance.all_nodes)
8879 if self.op.os_name and not self.op.force:
8880 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8881 self.op.force_variant)
8882 instance_os = self.op.os_name
8884 instance_os = instance.os
8886 if self.op.disk_template:
8887 if instance.disk_template == self.op.disk_template:
8888 raise errors.OpPrereqError("Instance already has disk template %s" %
8889 instance.disk_template, errors.ECODE_INVAL)
8891 if (instance.disk_template,
8892 self.op.disk_template) not in self._DISK_CONVERSIONS:
8893 raise errors.OpPrereqError("Unsupported disk template conversion from"
8894 " %s to %s" % (instance.disk_template,
8895 self.op.disk_template),
8897 _CheckInstanceDown(self, instance, "cannot change disk template")
8898 if self.op.disk_template in constants.DTS_NET_MIRROR:
8899 if self.op.remote_node == pnode:
8900 raise errors.OpPrereqError("Given new secondary node %s is the same"
8901 " as the primary node of the instance" %
8902 self.op.remote_node, errors.ECODE_STATE)
8903 _CheckNodeOnline(self, self.op.remote_node)
8904 _CheckNodeNotDrained(self, self.op.remote_node)
8905 disks = [{"size": d.size} for d in instance.disks]
8906 required = _ComputeDiskSize(self.op.disk_template, disks)
8907 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8909 # hvparams processing
8910 if self.op.hvparams:
8911 hv_type = instance.hypervisor
8912 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8913 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8914 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8917 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8918 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8919 self.hv_new = hv_new # the new actual values
8920 self.hv_inst = i_hvdict # the new dict (without defaults)
8922 self.hv_new = self.hv_inst = {}
8924 # beparams processing
8925 if self.op.beparams:
8926 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8928 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8929 be_new = cluster.SimpleFillBE(i_bedict)
8930 self.be_new = be_new # the new actual values
8931 self.be_inst = i_bedict # the new dict (without defaults)
8933 self.be_new = self.be_inst = {}
8935 # osparams processing
8936 if self.op.osparams:
8937 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8938 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8939 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8940 self.os_inst = i_osdict # the new dict (without defaults)
8942 self.os_new = self.os_inst = {}
8946 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8947 mem_check_list = [pnode]
8948 if be_new[constants.BE_AUTO_BALANCE]:
8949 # either we changed auto_balance to yes or it was from before
8950 mem_check_list.extend(instance.secondary_nodes)
8951 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8952 instance.hypervisor)
8953 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8954 instance.hypervisor)
8955 pninfo = nodeinfo[pnode]
8956 msg = pninfo.fail_msg
8958 # Assume the primary node is unreachable and go ahead
8959 self.warn.append("Can't get info from primary node %s: %s" %
8961 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8962 self.warn.append("Node data from primary node %s doesn't contain"
8963 " free memory information" % pnode)
8964 elif instance_info.fail_msg:
8965 self.warn.append("Can't get instance runtime information: %s" %
8966 instance_info.fail_msg)
8968 if instance_info.payload:
8969 current_mem = int(instance_info.payload['memory'])
8971 # Assume instance not running
8972 # (there is a slight race condition here, but it's not very probable,
8973 # and we have no other way to check)
8975 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8976 pninfo.payload['memory_free'])
8978 raise errors.OpPrereqError("This change will prevent the instance"
8979 " from starting, due to %d MB of memory"
8980 " missing on its primary node" % miss_mem,
8983 if be_new[constants.BE_AUTO_BALANCE]:
8984 for node, nres in nodeinfo.items():
8985 if node not in instance.secondary_nodes:
8989 self.warn.append("Can't get info from secondary node %s: %s" %
8991 elif not isinstance(nres.payload.get('memory_free', None), int):
8992 self.warn.append("Secondary node %s didn't return free"
8993 " memory information" % node)
8994 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8995 self.warn.append("Not enough memory to failover instance to"
8996 " secondary node %s" % node)
9001 for nic_op, nic_dict in self.op.nics:
9002 if nic_op == constants.DDM_REMOVE:
9003 if not instance.nics:
9004 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9007 if nic_op != constants.DDM_ADD:
9009 if not instance.nics:
9010 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9011 " no NICs" % nic_op,
9013 if nic_op < 0 or nic_op >= len(instance.nics):
9014 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9016 (nic_op, len(instance.nics) - 1),
9018 old_nic_params = instance.nics[nic_op].nicparams
9019 old_nic_ip = instance.nics[nic_op].ip
9024 update_params_dict = dict([(key, nic_dict[key])
9025 for key in constants.NICS_PARAMETERS
9026 if key in nic_dict])
9028 if 'bridge' in nic_dict:
9029 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9031 new_nic_params = _GetUpdatedParams(old_nic_params,
9033 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9034 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9035 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9036 self.nic_pinst[nic_op] = new_nic_params
9037 self.nic_pnew[nic_op] = new_filled_nic_params
9038 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9040 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9041 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9042 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9044 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9046 self.warn.append(msg)
9048 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9049 if new_nic_mode == constants.NIC_MODE_ROUTED:
9050 if 'ip' in nic_dict:
9051 nic_ip = nic_dict['ip']
9055 raise errors.OpPrereqError('Cannot set the nic ip to None'
9056 ' on a routed nic', errors.ECODE_INVAL)
9057 if 'mac' in nic_dict:
9058 nic_mac = nic_dict['mac']
9060 raise errors.OpPrereqError('Cannot set the nic mac to None',
9062 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9063 # otherwise generate the mac
9064 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9066 # or validate/reserve the current one
9068 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9069 except errors.ReservationError:
9070 raise errors.OpPrereqError("MAC address %s already in use"
9071 " in cluster" % nic_mac,
9072 errors.ECODE_NOTUNIQUE)
9075 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9076 raise errors.OpPrereqError("Disk operations not supported for"
9077 " diskless instances",
9079 for disk_op, _ in self.op.disks:
9080 if disk_op == constants.DDM_REMOVE:
9081 if len(instance.disks) == 1:
9082 raise errors.OpPrereqError("Cannot remove the last disk of"
9083 " an instance", errors.ECODE_INVAL)
9084 _CheckInstanceDown(self, instance, "cannot remove disks")
9086 if (disk_op == constants.DDM_ADD and
9087 len(instance.nics) >= constants.MAX_DISKS):
9088 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9089 " add more" % constants.MAX_DISKS,
9091 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9093 if disk_op < 0 or disk_op >= len(instance.disks):
9094 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9096 (disk_op, len(instance.disks)),
9101 def _ConvertPlainToDrbd(self, feedback_fn):
9102 """Converts an instance from plain to drbd.
9105 feedback_fn("Converting template to drbd")
9106 instance = self.instance
9107 pnode = instance.primary_node
9108 snode = self.op.remote_node
9110 # create a fake disk info for _GenerateDiskTemplate
9111 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9112 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9113 instance.name, pnode, [snode],
9114 disk_info, None, None, 0)
9115 info = _GetInstanceInfoText(instance)
9116 feedback_fn("Creating aditional volumes...")
9117 # first, create the missing data and meta devices
9118 for disk in new_disks:
9119 # unfortunately this is... not too nice
9120 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9122 for child in disk.children:
9123 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9124 # at this stage, all new LVs have been created, we can rename the
9126 feedback_fn("Renaming original volumes...")
9127 rename_list = [(o, n.children[0].logical_id)
9128 for (o, n) in zip(instance.disks, new_disks)]
9129 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9130 result.Raise("Failed to rename original LVs")
9132 feedback_fn("Initializing DRBD devices...")
9133 # all child devices are in place, we can now create the DRBD devices
9134 for disk in new_disks:
9135 for node in [pnode, snode]:
9136 f_create = node == pnode
9137 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9139 # at this point, the instance has been modified
9140 instance.disk_template = constants.DT_DRBD8
9141 instance.disks = new_disks
9142 self.cfg.Update(instance, feedback_fn)
9144 # disks are created, waiting for sync
9145 disk_abort = not _WaitForSync(self, instance)
9147 raise errors.OpExecError("There are some degraded disks for"
9148 " this instance, please cleanup manually")
9150 def _ConvertDrbdToPlain(self, feedback_fn):
9151 """Converts an instance from drbd to plain.
9154 instance = self.instance
9155 assert len(instance.secondary_nodes) == 1
9156 pnode = instance.primary_node
9157 snode = instance.secondary_nodes[0]
9158 feedback_fn("Converting template to plain")
9160 old_disks = instance.disks
9161 new_disks = [d.children[0] for d in old_disks]
9163 # copy over size and mode
9164 for parent, child in zip(old_disks, new_disks):
9165 child.size = parent.size
9166 child.mode = parent.mode
9168 # update instance structure
9169 instance.disks = new_disks
9170 instance.disk_template = constants.DT_PLAIN
9171 self.cfg.Update(instance, feedback_fn)
9173 feedback_fn("Removing volumes on the secondary node...")
9174 for disk in old_disks:
9175 self.cfg.SetDiskID(disk, snode)
9176 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9178 self.LogWarning("Could not remove block device %s on node %s,"
9179 " continuing anyway: %s", disk.iv_name, snode, msg)
9181 feedback_fn("Removing unneeded volumes on the primary node...")
9182 for idx, disk in enumerate(old_disks):
9183 meta = disk.children[1]
9184 self.cfg.SetDiskID(meta, pnode)
9185 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9187 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9188 " continuing anyway: %s", idx, pnode, msg)
9191 def Exec(self, feedback_fn):
9192 """Modifies an instance.
9194 All parameters take effect only at the next restart of the instance.
9197 # Process here the warnings from CheckPrereq, as we don't have a
9198 # feedback_fn there.
9199 for warn in self.warn:
9200 feedback_fn("WARNING: %s" % warn)
9203 instance = self.instance
9205 for disk_op, disk_dict in self.op.disks:
9206 if disk_op == constants.DDM_REMOVE:
9207 # remove the last disk
9208 device = instance.disks.pop()
9209 device_idx = len(instance.disks)
9210 for node, disk in device.ComputeNodeTree(instance.primary_node):
9211 self.cfg.SetDiskID(disk, node)
9212 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9214 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9215 " continuing anyway", device_idx, node, msg)
9216 result.append(("disk/%d" % device_idx, "remove"))
9217 elif disk_op == constants.DDM_ADD:
9219 if instance.disk_template == constants.DT_FILE:
9220 file_driver, file_path = instance.disks[0].logical_id
9221 file_path = os.path.dirname(file_path)
9223 file_driver = file_path = None
9224 disk_idx_base = len(instance.disks)
9225 new_disk = _GenerateDiskTemplate(self,
9226 instance.disk_template,
9227 instance.name, instance.primary_node,
9228 instance.secondary_nodes,
9233 instance.disks.append(new_disk)
9234 info = _GetInstanceInfoText(instance)
9236 logging.info("Creating volume %s for instance %s",
9237 new_disk.iv_name, instance.name)
9238 # Note: this needs to be kept in sync with _CreateDisks
9240 for node in instance.all_nodes:
9241 f_create = node == instance.primary_node
9243 _CreateBlockDev(self, node, instance, new_disk,
9244 f_create, info, f_create)
9245 except errors.OpExecError, err:
9246 self.LogWarning("Failed to create volume %s (%s) on"
9248 new_disk.iv_name, new_disk, node, err)
9249 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9250 (new_disk.size, new_disk.mode)))
9252 # change a given disk
9253 instance.disks[disk_op].mode = disk_dict['mode']
9254 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9256 if self.op.disk_template:
9257 r_shut = _ShutdownInstanceDisks(self, instance)
9259 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9260 " proceed with disk template conversion")
9261 mode = (instance.disk_template, self.op.disk_template)
9263 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9265 self.cfg.ReleaseDRBDMinors(instance.name)
9267 result.append(("disk_template", self.op.disk_template))
9270 for nic_op, nic_dict in self.op.nics:
9271 if nic_op == constants.DDM_REMOVE:
9272 # remove the last nic
9273 del instance.nics[-1]
9274 result.append(("nic.%d" % len(instance.nics), "remove"))
9275 elif nic_op == constants.DDM_ADD:
9276 # mac and bridge should be set, by now
9277 mac = nic_dict['mac']
9278 ip = nic_dict.get('ip', None)
9279 nicparams = self.nic_pinst[constants.DDM_ADD]
9280 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9281 instance.nics.append(new_nic)
9282 result.append(("nic.%d" % (len(instance.nics) - 1),
9283 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9284 (new_nic.mac, new_nic.ip,
9285 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9286 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9289 for key in 'mac', 'ip':
9291 setattr(instance.nics[nic_op], key, nic_dict[key])
9292 if nic_op in self.nic_pinst:
9293 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9294 for key, val in nic_dict.iteritems():
9295 result.append(("nic.%s/%d" % (key, nic_op), val))
9298 if self.op.hvparams:
9299 instance.hvparams = self.hv_inst
9300 for key, val in self.op.hvparams.iteritems():
9301 result.append(("hv/%s" % key, val))
9304 if self.op.beparams:
9305 instance.beparams = self.be_inst
9306 for key, val in self.op.beparams.iteritems():
9307 result.append(("be/%s" % key, val))
9311 instance.os = self.op.os_name
9314 if self.op.osparams:
9315 instance.osparams = self.os_inst
9316 for key, val in self.op.osparams.iteritems():
9317 result.append(("os/%s" % key, val))
9319 self.cfg.Update(instance, feedback_fn)
9323 _DISK_CONVERSIONS = {
9324 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9325 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9329 class LUQueryExports(NoHooksLU):
9330 """Query the exports list
9334 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9335 ("use_locking", False, _TBool),
9339 def ExpandNames(self):
9340 self.needed_locks = {}
9341 self.share_locks[locking.LEVEL_NODE] = 1
9342 if not self.op.nodes:
9343 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9345 self.needed_locks[locking.LEVEL_NODE] = \
9346 _GetWantedNodes(self, self.op.nodes)
9348 def Exec(self, feedback_fn):
9349 """Compute the list of all the exported system images.
9352 @return: a dictionary with the structure node->(export-list)
9353 where export-list is a list of the instances exported on
9357 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9358 rpcresult = self.rpc.call_export_list(self.nodes)
9360 for node in rpcresult:
9361 if rpcresult[node].fail_msg:
9362 result[node] = False
9364 result[node] = rpcresult[node].payload
9369 class LUPrepareExport(NoHooksLU):
9370 """Prepares an instance for an export and returns useful information.
9375 ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9379 def ExpandNames(self):
9380 self._ExpandAndLockInstance()
9382 def CheckPrereq(self):
9383 """Check prerequisites.
9386 instance_name = self.op.instance_name
9388 self.instance = self.cfg.GetInstanceInfo(instance_name)
9389 assert self.instance is not None, \
9390 "Cannot retrieve locked instance %s" % self.op.instance_name
9391 _CheckNodeOnline(self, self.instance.primary_node)
9393 self._cds = _GetClusterDomainSecret()
9395 def Exec(self, feedback_fn):
9396 """Prepares an instance for an export.
9399 instance = self.instance
9401 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9402 salt = utils.GenerateSecret(8)
9404 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9405 result = self.rpc.call_x509_cert_create(instance.primary_node,
9406 constants.RIE_CERT_VALIDITY)
9407 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9409 (name, cert_pem) = result.payload
9411 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9415 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9416 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9418 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9424 class LUExportInstance(LogicalUnit):
9425 """Export an instance to an image in the cluster.
9428 HPATH = "instance-export"
9429 HTYPE = constants.HTYPE_INSTANCE
9432 ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9433 ("shutdown", True, _TBool),
9435 ("remove_instance", False, _TBool),
9436 ("ignore_remove_failures", False, _TBool),
9437 ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9438 ("x509_key_name", None, _TOr(_TList, _TNone)),
9439 ("destination_x509_ca", None, _TMaybeString),
9443 def CheckArguments(self):
9444 """Check the arguments.
9447 self.x509_key_name = self.op.x509_key_name
9448 self.dest_x509_ca_pem = self.op.destination_x509_ca
9450 if self.op.remove_instance and not self.op.shutdown:
9451 raise errors.OpPrereqError("Can not remove instance without shutting it"
9454 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9455 if not self.x509_key_name:
9456 raise errors.OpPrereqError("Missing X509 key name for encryption",
9459 if not self.dest_x509_ca_pem:
9460 raise errors.OpPrereqError("Missing destination X509 CA",
9463 def ExpandNames(self):
9464 self._ExpandAndLockInstance()
9466 # Lock all nodes for local exports
9467 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9468 # FIXME: lock only instance primary and destination node
9470 # Sad but true, for now we have do lock all nodes, as we don't know where
9471 # the previous export might be, and in this LU we search for it and
9472 # remove it from its current node. In the future we could fix this by:
9473 # - making a tasklet to search (share-lock all), then create the
9474 # new one, then one to remove, after
9475 # - removing the removal operation altogether
9476 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9478 def DeclareLocks(self, level):
9479 """Last minute lock declaration."""
9480 # All nodes are locked anyway, so nothing to do here.
9482 def BuildHooksEnv(self):
9485 This will run on the master, primary node and target node.
9489 "EXPORT_MODE": self.op.mode,
9490 "EXPORT_NODE": self.op.target_node,
9491 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9492 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9493 # TODO: Generic function for boolean env variables
9494 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9497 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9499 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9501 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9502 nl.append(self.op.target_node)
9506 def CheckPrereq(self):
9507 """Check prerequisites.
9509 This checks that the instance and node names are valid.
9512 instance_name = self.op.instance_name
9514 self.instance = self.cfg.GetInstanceInfo(instance_name)
9515 assert self.instance is not None, \
9516 "Cannot retrieve locked instance %s" % self.op.instance_name
9517 _CheckNodeOnline(self, self.instance.primary_node)
9519 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9520 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9521 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9522 assert self.dst_node is not None
9524 _CheckNodeOnline(self, self.dst_node.name)
9525 _CheckNodeNotDrained(self, self.dst_node.name)
9528 self.dest_disk_info = None
9529 self.dest_x509_ca = None
9531 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9532 self.dst_node = None
9534 if len(self.op.target_node) != len(self.instance.disks):
9535 raise errors.OpPrereqError(("Received destination information for %s"
9536 " disks, but instance %s has %s disks") %
9537 (len(self.op.target_node), instance_name,
9538 len(self.instance.disks)),
9541 cds = _GetClusterDomainSecret()
9543 # Check X509 key name
9545 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9546 except (TypeError, ValueError), err:
9547 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9549 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9550 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9553 # Load and verify CA
9555 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9556 except OpenSSL.crypto.Error, err:
9557 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9558 (err, ), errors.ECODE_INVAL)
9560 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9561 if errcode is not None:
9562 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9563 (msg, ), errors.ECODE_INVAL)
9565 self.dest_x509_ca = cert
9567 # Verify target information
9569 for idx, disk_data in enumerate(self.op.target_node):
9571 (host, port, magic) = \
9572 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9573 except errors.GenericError, err:
9574 raise errors.OpPrereqError("Target info for disk %s: %s" %
9575 (idx, err), errors.ECODE_INVAL)
9577 disk_info.append((host, port, magic))
9579 assert len(disk_info) == len(self.op.target_node)
9580 self.dest_disk_info = disk_info
9583 raise errors.ProgrammerError("Unhandled export mode %r" %
9586 # instance disk type verification
9587 # TODO: Implement export support for file-based disks
9588 for disk in self.instance.disks:
9589 if disk.dev_type == constants.LD_FILE:
9590 raise errors.OpPrereqError("Export not supported for instances with"
9591 " file-based disks", errors.ECODE_INVAL)
9593 def _CleanupExports(self, feedback_fn):
9594 """Removes exports of current instance from all other nodes.
9596 If an instance in a cluster with nodes A..D was exported to node C, its
9597 exports will be removed from the nodes A, B and D.
9600 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9602 nodelist = self.cfg.GetNodeList()
9603 nodelist.remove(self.dst_node.name)
9605 # on one-node clusters nodelist will be empty after the removal
9606 # if we proceed the backup would be removed because OpQueryExports
9607 # substitutes an empty list with the full cluster node list.
9608 iname = self.instance.name
9610 feedback_fn("Removing old exports for instance %s" % iname)
9611 exportlist = self.rpc.call_export_list(nodelist)
9612 for node in exportlist:
9613 if exportlist[node].fail_msg:
9615 if iname in exportlist[node].payload:
9616 msg = self.rpc.call_export_remove(node, iname).fail_msg
9618 self.LogWarning("Could not remove older export for instance %s"
9619 " on node %s: %s", iname, node, msg)
9621 def Exec(self, feedback_fn):
9622 """Export an instance to an image in the cluster.
9625 assert self.op.mode in constants.EXPORT_MODES
9627 instance = self.instance
9628 src_node = instance.primary_node
9630 if self.op.shutdown:
9631 # shutdown the instance, but not the disks
9632 feedback_fn("Shutting down instance %s" % instance.name)
9633 result = self.rpc.call_instance_shutdown(src_node, instance,
9634 self.op.shutdown_timeout)
9635 # TODO: Maybe ignore failures if ignore_remove_failures is set
9636 result.Raise("Could not shutdown instance %s on"
9637 " node %s" % (instance.name, src_node))
9639 # set the disks ID correctly since call_instance_start needs the
9640 # correct drbd minor to create the symlinks
9641 for disk in instance.disks:
9642 self.cfg.SetDiskID(disk, src_node)
9644 activate_disks = (not instance.admin_up)
9647 # Activate the instance disks if we'exporting a stopped instance
9648 feedback_fn("Activating disks for %s" % instance.name)
9649 _StartInstanceDisks(self, instance, None)
9652 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9655 helper.CreateSnapshots()
9657 if (self.op.shutdown and instance.admin_up and
9658 not self.op.remove_instance):
9659 assert not activate_disks
9660 feedback_fn("Starting instance %s" % instance.name)
9661 result = self.rpc.call_instance_start(src_node, instance, None, None)
9662 msg = result.fail_msg
9664 feedback_fn("Failed to start instance: %s" % msg)
9665 _ShutdownInstanceDisks(self, instance)
9666 raise errors.OpExecError("Could not start instance: %s" % msg)
9668 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9669 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9670 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9671 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9672 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9674 (key_name, _, _) = self.x509_key_name
9677 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9680 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9681 key_name, dest_ca_pem,
9686 # Check for backwards compatibility
9687 assert len(dresults) == len(instance.disks)
9688 assert compat.all(isinstance(i, bool) for i in dresults), \
9689 "Not all results are boolean: %r" % dresults
9693 feedback_fn("Deactivating disks for %s" % instance.name)
9694 _ShutdownInstanceDisks(self, instance)
9696 if not (compat.all(dresults) and fin_resu):
9699 failures.append("export finalization")
9700 if not compat.all(dresults):
9701 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9703 failures.append("disk export: disk(s) %s" % fdsk)
9705 raise errors.OpExecError("Export failed, errors in %s" %
9706 utils.CommaJoin(failures))
9708 # At this point, the export was successful, we can cleanup/finish
9710 # Remove instance if requested
9711 if self.op.remove_instance:
9712 feedback_fn("Removing instance %s" % instance.name)
9713 _RemoveInstance(self, feedback_fn, instance,
9714 self.op.ignore_remove_failures)
9716 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9717 self._CleanupExports(feedback_fn)
9719 return fin_resu, dresults
9722 class LURemoveExport(NoHooksLU):
9723 """Remove exports related to the named instance.
9731 def ExpandNames(self):
9732 self.needed_locks = {}
9733 # We need all nodes to be locked in order for RemoveExport to work, but we
9734 # don't need to lock the instance itself, as nothing will happen to it (and
9735 # we can remove exports also for a removed instance)
9736 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9738 def Exec(self, feedback_fn):
9739 """Remove any export.
9742 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9743 # If the instance was not found we'll try with the name that was passed in.
9744 # This will only work if it was an FQDN, though.
9746 if not instance_name:
9748 instance_name = self.op.instance_name
9750 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9751 exportlist = self.rpc.call_export_list(locked_nodes)
9753 for node in exportlist:
9754 msg = exportlist[node].fail_msg
9756 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9758 if instance_name in exportlist[node].payload:
9760 result = self.rpc.call_export_remove(node, instance_name)
9761 msg = result.fail_msg
9763 logging.error("Could not remove export for instance %s"
9764 " on node %s: %s", instance_name, node, msg)
9766 if fqdn_warn and not found:
9767 feedback_fn("Export not found. If trying to remove an export belonging"
9768 " to a deleted instance please use its Fully Qualified"
9772 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9775 This is an abstract class which is the parent of all the other tags LUs.
9779 def ExpandNames(self):
9780 self.needed_locks = {}
9781 if self.op.kind == constants.TAG_NODE:
9782 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9783 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9784 elif self.op.kind == constants.TAG_INSTANCE:
9785 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9786 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9788 def CheckPrereq(self):
9789 """Check prerequisites.
9792 if self.op.kind == constants.TAG_CLUSTER:
9793 self.target = self.cfg.GetClusterInfo()
9794 elif self.op.kind == constants.TAG_NODE:
9795 self.target = self.cfg.GetNodeInfo(self.op.name)
9796 elif self.op.kind == constants.TAG_INSTANCE:
9797 self.target = self.cfg.GetInstanceInfo(self.op.name)
9799 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9800 str(self.op.kind), errors.ECODE_INVAL)
9803 class LUGetTags(TagsLU):
9804 """Returns the tags of a given object.
9808 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9809 # Name is only meaningful for nodes and instances
9810 ("name", _NoDefault, _TMaybeString),
9814 def Exec(self, feedback_fn):
9815 """Returns the tag list.
9818 return list(self.target.GetTags())
9821 class LUSearchTags(NoHooksLU):
9822 """Searches the tags for a given pattern.
9826 ("pattern", _NoDefault, _TNonEmptyString),
9830 def ExpandNames(self):
9831 self.needed_locks = {}
9833 def CheckPrereq(self):
9834 """Check prerequisites.
9836 This checks the pattern passed for validity by compiling it.
9840 self.re = re.compile(self.op.pattern)
9841 except re.error, err:
9842 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9843 (self.op.pattern, err), errors.ECODE_INVAL)
9845 def Exec(self, feedback_fn):
9846 """Returns the tag list.
9850 tgts = [("/cluster", cfg.GetClusterInfo())]
9851 ilist = cfg.GetAllInstancesInfo().values()
9852 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9853 nlist = cfg.GetAllNodesInfo().values()
9854 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9856 for path, target in tgts:
9857 for tag in target.GetTags():
9858 if self.re.search(tag):
9859 results.append((path, tag))
9863 class LUAddTags(TagsLU):
9864 """Sets a tag on a given object.
9868 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9869 # Name is only meaningful for nodes and instances
9870 ("name", _NoDefault, _TMaybeString),
9871 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9875 def CheckPrereq(self):
9876 """Check prerequisites.
9878 This checks the type and length of the tag name and value.
9881 TagsLU.CheckPrereq(self)
9882 for tag in self.op.tags:
9883 objects.TaggableObject.ValidateTag(tag)
9885 def Exec(self, feedback_fn):
9890 for tag in self.op.tags:
9891 self.target.AddTag(tag)
9892 except errors.TagError, err:
9893 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9894 self.cfg.Update(self.target, feedback_fn)
9897 class LUDelTags(TagsLU):
9898 """Delete a list of tags from a given object.
9902 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9903 # Name is only meaningful for nodes and instances
9904 ("name", _NoDefault, _TMaybeString),
9905 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9909 def CheckPrereq(self):
9910 """Check prerequisites.
9912 This checks that we have the given tag.
9915 TagsLU.CheckPrereq(self)
9916 for tag in self.op.tags:
9917 objects.TaggableObject.ValidateTag(tag)
9918 del_tags = frozenset(self.op.tags)
9919 cur_tags = self.target.GetTags()
9920 if not del_tags <= cur_tags:
9921 diff_tags = del_tags - cur_tags
9922 diff_names = ["'%s'" % tag for tag in diff_tags]
9924 raise errors.OpPrereqError("Tag(s) %s not found" %
9925 (",".join(diff_names)), errors.ECODE_NOENT)
9927 def Exec(self, feedback_fn):
9928 """Remove the tag from the object.
9931 for tag in self.op.tags:
9932 self.target.RemoveTag(tag)
9933 self.cfg.Update(self.target, feedback_fn)
9936 class LUTestDelay(NoHooksLU):
9937 """Sleep for a specified amount of time.
9939 This LU sleeps on the master and/or nodes for a specified amount of
9944 ("duration", _NoDefault, _TFloat),
9945 ("on_master", True, _TBool),
9946 ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9947 ("repeat", 0, _TPositiveInt)
9951 def ExpandNames(self):
9952 """Expand names and set required locks.
9954 This expands the node list, if any.
9957 self.needed_locks = {}
9958 if self.op.on_nodes:
9959 # _GetWantedNodes can be used here, but is not always appropriate to use
9960 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9962 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9963 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9965 def _TestDelay(self):
9966 """Do the actual sleep.
9969 if self.op.on_master:
9970 if not utils.TestDelay(self.op.duration):
9971 raise errors.OpExecError("Error during master delay test")
9972 if self.op.on_nodes:
9973 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9974 for node, node_result in result.items():
9975 node_result.Raise("Failure during rpc call to node %s" % node)
9977 def Exec(self, feedback_fn):
9978 """Execute the test delay opcode, with the wanted repetitions.
9981 if self.op.repeat == 0:
9984 top_value = self.op.repeat - 1
9985 for i in range(self.op.repeat):
9986 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9990 class LUTestJobqueue(NoHooksLU):
9991 """Utility LU to test some aspects of the job queue.
9995 ("notify_waitlock", False, _TBool),
9996 ("notify_exec", False, _TBool),
9997 ("log_messages", _EmptyList, _TListOf(_TString)),
9998 ("fail", False, _TBool),
10002 # Must be lower than default timeout for WaitForJobChange to see whether it
10003 # notices changed jobs
10004 _CLIENT_CONNECT_TIMEOUT = 20.0
10005 _CLIENT_CONFIRM_TIMEOUT = 60.0
10008 def _NotifyUsingSocket(cls, cb, errcls):
10009 """Opens a Unix socket and waits for another program to connect.
10012 @param cb: Callback to send socket name to client
10013 @type errcls: class
10014 @param errcls: Exception class to use for errors
10017 # Using a temporary directory as there's no easy way to create temporary
10018 # sockets without writing a custom loop around tempfile.mktemp and
10020 tmpdir = tempfile.mkdtemp()
10022 tmpsock = utils.PathJoin(tmpdir, "sock")
10024 logging.debug("Creating temporary socket at %s", tmpsock)
10025 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10030 # Send details to client
10033 # Wait for client to connect before continuing
10034 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10036 (conn, _) = sock.accept()
10037 except socket.error, err:
10038 raise errcls("Client didn't connect in time (%s)" % err)
10042 # Remove as soon as client is connected
10043 shutil.rmtree(tmpdir)
10045 # Wait for client to close
10048 # pylint: disable-msg=E1101
10049 # Instance of '_socketobject' has no ... member
10050 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10052 except socket.error, err:
10053 raise errcls("Client failed to confirm notification (%s)" % err)
10057 def _SendNotification(self, test, arg, sockname):
10058 """Sends a notification to the client.
10061 @param test: Test name
10062 @param arg: Test argument (depends on test)
10063 @type sockname: string
10064 @param sockname: Socket path
10067 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10069 def _Notify(self, prereq, test, arg):
10070 """Notifies the client of a test.
10073 @param prereq: Whether this is a prereq-phase test
10075 @param test: Test name
10076 @param arg: Test argument (depends on test)
10080 errcls = errors.OpPrereqError
10082 errcls = errors.OpExecError
10084 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10088 def CheckArguments(self):
10089 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10090 self.expandnames_calls = 0
10092 def ExpandNames(self):
10093 checkargs_calls = getattr(self, "checkargs_calls", 0)
10094 if checkargs_calls < 1:
10095 raise errors.ProgrammerError("CheckArguments was not called")
10097 self.expandnames_calls += 1
10099 if self.op.notify_waitlock:
10100 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10102 self.LogInfo("Expanding names")
10104 # Get lock on master node (just to get a lock, not for a particular reason)
10105 self.needed_locks = {
10106 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10109 def Exec(self, feedback_fn):
10110 if self.expandnames_calls < 1:
10111 raise errors.ProgrammerError("ExpandNames was not called")
10113 if self.op.notify_exec:
10114 self._Notify(False, constants.JQT_EXEC, None)
10116 self.LogInfo("Executing")
10118 if self.op.log_messages:
10119 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10120 for idx, msg in enumerate(self.op.log_messages):
10121 self.LogInfo("Sending log message %s", idx + 1)
10122 feedback_fn(constants.JQT_MSGPREFIX + msg)
10123 # Report how many test messages have been sent
10124 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10127 raise errors.OpExecError("Opcode failure was requested")
10132 class IAllocator(object):
10133 """IAllocator framework.
10135 An IAllocator instance has three sets of attributes:
10136 - cfg that is needed to query the cluster
10137 - input data (all members of the _KEYS class attribute are required)
10138 - four buffer attributes (in|out_data|text), that represent the
10139 input (to the external script) in text and data structure format,
10140 and the output from it, again in two formats
10141 - the result variables from the script (success, info, nodes) for
10145 # pylint: disable-msg=R0902
10146 # lots of instance attributes
10148 "name", "mem_size", "disks", "disk_template",
10149 "os", "tags", "nics", "vcpus", "hypervisor",
10152 "name", "relocate_from",
10158 def __init__(self, cfg, rpc, mode, **kwargs):
10161 # init buffer variables
10162 self.in_text = self.out_text = self.in_data = self.out_data = None
10163 # init all input fields so that pylint is happy
10165 self.mem_size = self.disks = self.disk_template = None
10166 self.os = self.tags = self.nics = self.vcpus = None
10167 self.hypervisor = None
10168 self.relocate_from = None
10170 self.evac_nodes = None
10172 self.required_nodes = None
10173 # init result fields
10174 self.success = self.info = self.result = None
10175 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10176 keyset = self._ALLO_KEYS
10177 fn = self._AddNewInstance
10178 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10179 keyset = self._RELO_KEYS
10180 fn = self._AddRelocateInstance
10181 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10182 keyset = self._EVAC_KEYS
10183 fn = self._AddEvacuateNodes
10185 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10186 " IAllocator" % self.mode)
10188 if key not in keyset:
10189 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10190 " IAllocator" % key)
10191 setattr(self, key, kwargs[key])
10194 if key not in kwargs:
10195 raise errors.ProgrammerError("Missing input parameter '%s' to"
10196 " IAllocator" % key)
10197 self._BuildInputData(fn)
10199 def _ComputeClusterData(self):
10200 """Compute the generic allocator input data.
10202 This is the data that is independent of the actual operation.
10206 cluster_info = cfg.GetClusterInfo()
10209 "version": constants.IALLOCATOR_VERSION,
10210 "cluster_name": cfg.GetClusterName(),
10211 "cluster_tags": list(cluster_info.GetTags()),
10212 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10213 # we don't have job IDs
10215 iinfo = cfg.GetAllInstancesInfo().values()
10216 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10220 node_list = cfg.GetNodeList()
10222 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10223 hypervisor_name = self.hypervisor
10224 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10225 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10226 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10227 hypervisor_name = cluster_info.enabled_hypervisors[0]
10229 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10232 self.rpc.call_all_instances_info(node_list,
10233 cluster_info.enabled_hypervisors)
10234 for nname, nresult in node_data.items():
10235 # first fill in static (config-based) values
10236 ninfo = cfg.GetNodeInfo(nname)
10238 "tags": list(ninfo.GetTags()),
10239 "primary_ip": ninfo.primary_ip,
10240 "secondary_ip": ninfo.secondary_ip,
10241 "offline": ninfo.offline,
10242 "drained": ninfo.drained,
10243 "master_candidate": ninfo.master_candidate,
10246 if not (ninfo.offline or ninfo.drained):
10247 nresult.Raise("Can't get data for node %s" % nname)
10248 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10250 remote_info = nresult.payload
10252 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10253 'vg_size', 'vg_free', 'cpu_total']:
10254 if attr not in remote_info:
10255 raise errors.OpExecError("Node '%s' didn't return attribute"
10256 " '%s'" % (nname, attr))
10257 if not isinstance(remote_info[attr], int):
10258 raise errors.OpExecError("Node '%s' returned invalid value"
10260 (nname, attr, remote_info[attr]))
10261 # compute memory used by primary instances
10262 i_p_mem = i_p_up_mem = 0
10263 for iinfo, beinfo in i_list:
10264 if iinfo.primary_node == nname:
10265 i_p_mem += beinfo[constants.BE_MEMORY]
10266 if iinfo.name not in node_iinfo[nname].payload:
10269 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10270 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10271 remote_info['memory_free'] -= max(0, i_mem_diff)
10274 i_p_up_mem += beinfo[constants.BE_MEMORY]
10276 # compute memory used by instances
10278 "total_memory": remote_info['memory_total'],
10279 "reserved_memory": remote_info['memory_dom0'],
10280 "free_memory": remote_info['memory_free'],
10281 "total_disk": remote_info['vg_size'],
10282 "free_disk": remote_info['vg_free'],
10283 "total_cpus": remote_info['cpu_total'],
10284 "i_pri_memory": i_p_mem,
10285 "i_pri_up_memory": i_p_up_mem,
10287 pnr.update(pnr_dyn)
10289 node_results[nname] = pnr
10290 data["nodes"] = node_results
10294 for iinfo, beinfo in i_list:
10296 for nic in iinfo.nics:
10297 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10298 nic_dict = {"mac": nic.mac,
10300 "mode": filled_params[constants.NIC_MODE],
10301 "link": filled_params[constants.NIC_LINK],
10303 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10304 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10305 nic_data.append(nic_dict)
10307 "tags": list(iinfo.GetTags()),
10308 "admin_up": iinfo.admin_up,
10309 "vcpus": beinfo[constants.BE_VCPUS],
10310 "memory": beinfo[constants.BE_MEMORY],
10312 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10314 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10315 "disk_template": iinfo.disk_template,
10316 "hypervisor": iinfo.hypervisor,
10318 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10320 instance_data[iinfo.name] = pir
10322 data["instances"] = instance_data
10324 self.in_data = data
10326 def _AddNewInstance(self):
10327 """Add new instance data to allocator structure.
10329 This in combination with _AllocatorGetClusterData will create the
10330 correct structure needed as input for the allocator.
10332 The checks for the completeness of the opcode must have already been
10336 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10338 if self.disk_template in constants.DTS_NET_MIRROR:
10339 self.required_nodes = 2
10341 self.required_nodes = 1
10344 "disk_template": self.disk_template,
10347 "vcpus": self.vcpus,
10348 "memory": self.mem_size,
10349 "disks": self.disks,
10350 "disk_space_total": disk_space,
10352 "required_nodes": self.required_nodes,
10356 def _AddRelocateInstance(self):
10357 """Add relocate instance data to allocator structure.
10359 This in combination with _IAllocatorGetClusterData will create the
10360 correct structure needed as input for the allocator.
10362 The checks for the completeness of the opcode must have already been
10366 instance = self.cfg.GetInstanceInfo(self.name)
10367 if instance is None:
10368 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10369 " IAllocator" % self.name)
10371 if instance.disk_template not in constants.DTS_NET_MIRROR:
10372 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10373 errors.ECODE_INVAL)
10375 if len(instance.secondary_nodes) != 1:
10376 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10377 errors.ECODE_STATE)
10379 self.required_nodes = 1
10380 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10381 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10385 "disk_space_total": disk_space,
10386 "required_nodes": self.required_nodes,
10387 "relocate_from": self.relocate_from,
10391 def _AddEvacuateNodes(self):
10392 """Add evacuate nodes data to allocator structure.
10396 "evac_nodes": self.evac_nodes
10400 def _BuildInputData(self, fn):
10401 """Build input data structures.
10404 self._ComputeClusterData()
10407 request["type"] = self.mode
10408 self.in_data["request"] = request
10410 self.in_text = serializer.Dump(self.in_data)
10412 def Run(self, name, validate=True, call_fn=None):
10413 """Run an instance allocator and return the results.
10416 if call_fn is None:
10417 call_fn = self.rpc.call_iallocator_runner
10419 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10420 result.Raise("Failure while running the iallocator script")
10422 self.out_text = result.payload
10424 self._ValidateResult()
10426 def _ValidateResult(self):
10427 """Process the allocator results.
10429 This will process and if successful save the result in
10430 self.out_data and the other parameters.
10434 rdict = serializer.Load(self.out_text)
10435 except Exception, err:
10436 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10438 if not isinstance(rdict, dict):
10439 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10441 # TODO: remove backwards compatiblity in later versions
10442 if "nodes" in rdict and "result" not in rdict:
10443 rdict["result"] = rdict["nodes"]
10446 for key in "success", "info", "result":
10447 if key not in rdict:
10448 raise errors.OpExecError("Can't parse iallocator results:"
10449 " missing key '%s'" % key)
10450 setattr(self, key, rdict[key])
10452 if not isinstance(rdict["result"], list):
10453 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10455 self.out_data = rdict
10458 class LUTestAllocator(NoHooksLU):
10459 """Run allocator tests.
10461 This LU runs the allocator tests
10465 ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10466 ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10467 ("name", _NoDefault, _TNonEmptyString),
10468 ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10469 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10470 _TOr(_TNone, _TNonEmptyString))))),
10471 ("disks", _NoDefault, _TOr(_TNone, _TList)),
10472 ("hypervisor", None, _TMaybeString),
10473 ("allocator", None, _TMaybeString),
10474 ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10475 ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10476 ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10477 ("os", None, _TMaybeString),
10478 ("disk_template", None, _TMaybeString),
10479 ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10482 def CheckPrereq(self):
10483 """Check prerequisites.
10485 This checks the opcode parameters depending on the director and mode test.
10488 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10489 for attr in ["mem_size", "disks", "disk_template",
10490 "os", "tags", "nics", "vcpus"]:
10491 if not hasattr(self.op, attr):
10492 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10493 attr, errors.ECODE_INVAL)
10494 iname = self.cfg.ExpandInstanceName(self.op.name)
10495 if iname is not None:
10496 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10497 iname, errors.ECODE_EXISTS)
10498 if not isinstance(self.op.nics, list):
10499 raise errors.OpPrereqError("Invalid parameter 'nics'",
10500 errors.ECODE_INVAL)
10501 if not isinstance(self.op.disks, list):
10502 raise errors.OpPrereqError("Invalid parameter 'disks'",
10503 errors.ECODE_INVAL)
10504 for row in self.op.disks:
10505 if (not isinstance(row, dict) or
10506 "size" not in row or
10507 not isinstance(row["size"], int) or
10508 "mode" not in row or
10509 row["mode"] not in ['r', 'w']):
10510 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10511 " parameter", errors.ECODE_INVAL)
10512 if self.op.hypervisor is None:
10513 self.op.hypervisor = self.cfg.GetHypervisorType()
10514 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10515 fname = _ExpandInstanceName(self.cfg, self.op.name)
10516 self.op.name = fname
10517 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10518 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10519 if not hasattr(self.op, "evac_nodes"):
10520 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10521 " opcode input", errors.ECODE_INVAL)
10523 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10524 self.op.mode, errors.ECODE_INVAL)
10526 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10527 if self.op.allocator is None:
10528 raise errors.OpPrereqError("Missing allocator name",
10529 errors.ECODE_INVAL)
10530 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10531 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10532 self.op.direction, errors.ECODE_INVAL)
10534 def Exec(self, feedback_fn):
10535 """Run the allocator test.
10538 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10539 ial = IAllocator(self.cfg, self.rpc,
10542 mem_size=self.op.mem_size,
10543 disks=self.op.disks,
10544 disk_template=self.op.disk_template,
10548 vcpus=self.op.vcpus,
10549 hypervisor=self.op.hypervisor,
10551 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10552 ial = IAllocator(self.cfg, self.rpc,
10555 relocate_from=list(self.relocate_from),
10557 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10558 ial = IAllocator(self.cfg, self.rpc,
10560 evac_nodes=self.op.evac_nodes)
10562 raise errors.ProgrammerError("Uncatched mode %s in"
10563 " LUTestAllocator.Exec", self.op.mode)
10565 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10566 result = ial.in_text
10568 ial.Run(self.op.allocator, validate=False)
10569 result = ial.out_text