4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
57 import ganeti.masterd.instance # pylint: disable-msg=W0611
60 # Modifiable default values; need to define these here before the
64 """Returns an empty list.
71 """Returns an empty dict.
77 #: The without-default default value
81 #: The no-type (value to complex to check it in the type system)
87 """Checks if the given value is not None.
90 return val is not None
94 """Checks if the given value is None.
101 """Checks if the given value is a boolean.
104 return isinstance(val, bool)
108 """Checks if the given value is an integer.
111 return isinstance(val, int)
115 """Checks if the given value is a float.
118 return isinstance(val, float)
122 """Checks if the given value is a string.
125 return isinstance(val, basestring)
129 """Checks if a given value evaluates to a boolean True value.
135 def _TElemOf(target_list):
136 """Builds a function that checks if a given value is a member of a list.
139 return lambda val: val in target_list
144 """Checks if the given value is a list.
147 return isinstance(val, list)
151 """Checks if the given value is a dictionary.
154 return isinstance(val, dict)
157 def _TIsLength(size):
158 """Check is the given container is of the given size.
161 return lambda container: len(container) == size
166 """Combine multiple functions using an AND operation.
170 return compat.all(t(val) for t in args)
175 """Combine multiple functions using an AND operation.
179 return compat.any(t(val) for t in args)
184 """Checks that a modified version of the argument passes the given test.
187 return lambda val: test(fn(val))
192 #: a non-empty string
193 _TNonEmptyString = _TAnd(_TString, _TTrue)
196 #: a maybe non-empty string
197 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
200 #: a maybe boolean (bool or none)
201 _TMaybeBool = _TOr(_TBool, _TNone)
204 #: a positive integer
205 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
207 #: a strictly positive integer
208 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
211 def _TListOf(my_type):
212 """Checks if a given value is a list with all elements of the same type.
216 lambda lst: compat.all(my_type(v) for v in lst))
219 def _TDictOf(key_type, val_type):
220 """Checks a dict type for the type of its key/values.
224 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
225 and compat.all(val_type(v)
226 for v in my_dict.values())))
229 # Common opcode attributes
231 #: output fields for a query operation
232 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
235 #: the shutdown timeout
236 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
239 #: the force parameter
240 _PForce = ("force", False, _TBool)
242 #: a required instance name (for single-instance LUs)
243 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
246 #: a required node name (for single-node LUs)
247 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
249 #: the migration type (live/non-live)
250 _PMigrationMode = ("mode", None, _TOr(_TNone,
251 _TElemOf(constants.HT_MIGRATION_MODES)))
253 #: the obsolete 'live' mode (boolean)
254 _PMigrationLive = ("live", None, _TMaybeBool)
258 class LogicalUnit(object):
259 """Logical Unit base class.
261 Subclasses must follow these rules:
262 - implement ExpandNames
263 - implement CheckPrereq (except when tasklets are used)
264 - implement Exec (except when tasklets are used)
265 - implement BuildHooksEnv
266 - redefine HPATH and HTYPE
267 - optionally redefine their run requirements:
268 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
270 Note that all commands require root permissions.
272 @ivar dry_run_result: the value (if any) that will be returned to the caller
273 in dry-run mode (signalled by opcode dry_run parameter)
274 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
275 they should get if not already defined, and types they must match
283 def __init__(self, processor, op, context, rpc):
284 """Constructor for LogicalUnit.
286 This needs to be overridden in derived classes in order to check op
290 self.proc = processor
292 self.cfg = context.cfg
293 self.context = context
295 # Dicts used to declare locking needs to mcpu
296 self.needed_locks = None
297 self.acquired_locks = {}
298 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
300 self.remove_locks = {}
301 # Used to force good behavior when calling helper functions
302 self.recalculate_locks = {}
305 self.Log = processor.Log # pylint: disable-msg=C0103
306 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
307 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
308 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
309 # support for dry-run
310 self.dry_run_result = None
311 # support for generic debug attribute
312 if (not hasattr(self.op, "debug_level") or
313 not isinstance(self.op.debug_level, int)):
314 self.op.debug_level = 0
319 # The new kind-of-type-system
320 op_id = self.op.OP_ID
321 for attr_name, aval, test in self._OP_PARAMS:
322 if not hasattr(op, attr_name):
323 if aval == _NoDefault:
324 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
325 (op_id, attr_name), errors.ECODE_INVAL)
331 setattr(self.op, attr_name, dval)
332 attr_val = getattr(op, attr_name)
336 if not callable(test):
337 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
338 " given type is not a proper type (%s)" %
339 (op_id, attr_name, test))
340 if not test(attr_val):
341 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
342 self.op.OP_ID, attr_name, type(attr_val), attr_val)
343 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
344 (op_id, attr_name), errors.ECODE_INVAL)
346 self.CheckArguments()
349 """Returns the SshRunner object
353 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
356 ssh = property(fget=__GetSSH)
358 def CheckArguments(self):
359 """Check syntactic validity for the opcode arguments.
361 This method is for doing a simple syntactic check and ensure
362 validity of opcode parameters, without any cluster-related
363 checks. While the same can be accomplished in ExpandNames and/or
364 CheckPrereq, doing these separate is better because:
366 - ExpandNames is left as as purely a lock-related function
367 - CheckPrereq is run after we have acquired locks (and possible
370 The function is allowed to change the self.op attribute so that
371 later methods can no longer worry about missing parameters.
376 def ExpandNames(self):
377 """Expand names for this LU.
379 This method is called before starting to execute the opcode, and it should
380 update all the parameters of the opcode to their canonical form (e.g. a
381 short node name must be fully expanded after this method has successfully
382 completed). This way locking, hooks, logging, ecc. can work correctly.
384 LUs which implement this method must also populate the self.needed_locks
385 member, as a dict with lock levels as keys, and a list of needed lock names
388 - use an empty dict if you don't need any lock
389 - if you don't need any lock at a particular level omit that level
390 - don't put anything for the BGL level
391 - if you want all locks at a level use locking.ALL_SET as a value
393 If you need to share locks (rather than acquire them exclusively) at one
394 level you can modify self.share_locks, setting a true value (usually 1) for
395 that level. By default locks are not shared.
397 This function can also define a list of tasklets, which then will be
398 executed in order instead of the usual LU-level CheckPrereq and Exec
399 functions, if those are not defined by the LU.
403 # Acquire all nodes and one instance
404 self.needed_locks = {
405 locking.LEVEL_NODE: locking.ALL_SET,
406 locking.LEVEL_INSTANCE: ['instance1.example.com'],
408 # Acquire just two nodes
409 self.needed_locks = {
410 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
413 self.needed_locks = {} # No, you can't leave it to the default value None
416 # The implementation of this method is mandatory only if the new LU is
417 # concurrent, so that old LUs don't need to be changed all at the same
420 self.needed_locks = {} # Exclusive LUs don't need locks.
422 raise NotImplementedError
424 def DeclareLocks(self, level):
425 """Declare LU locking needs for a level
427 While most LUs can just declare their locking needs at ExpandNames time,
428 sometimes there's the need to calculate some locks after having acquired
429 the ones before. This function is called just before acquiring locks at a
430 particular level, but after acquiring the ones at lower levels, and permits
431 such calculations. It can be used to modify self.needed_locks, and by
432 default it does nothing.
434 This function is only called if you have something already set in
435 self.needed_locks for the level.
437 @param level: Locking level which is going to be locked
438 @type level: member of ganeti.locking.LEVELS
442 def CheckPrereq(self):
443 """Check prerequisites for this LU.
445 This method should check that the prerequisites for the execution
446 of this LU are fulfilled. It can do internode communication, but
447 it should be idempotent - no cluster or system changes are
450 The method should raise errors.OpPrereqError in case something is
451 not fulfilled. Its return value is ignored.
453 This method should also update all the parameters of the opcode to
454 their canonical form if it hasn't been done by ExpandNames before.
457 if self.tasklets is not None:
458 for (idx, tl) in enumerate(self.tasklets):
459 logging.debug("Checking prerequisites for tasklet %s/%s",
460 idx + 1, len(self.tasklets))
465 def Exec(self, feedback_fn):
468 This method should implement the actual work. It should raise
469 errors.OpExecError for failures that are somewhat dealt with in
473 if self.tasklets is not None:
474 for (idx, tl) in enumerate(self.tasklets):
475 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
478 raise NotImplementedError
480 def BuildHooksEnv(self):
481 """Build hooks environment for this LU.
483 This method should return a three-node tuple consisting of: a dict
484 containing the environment that will be used for running the
485 specific hook for this LU, a list of node names on which the hook
486 should run before the execution, and a list of node names on which
487 the hook should run after the execution.
489 The keys of the dict must not have 'GANETI_' prefixed as this will
490 be handled in the hooks runner. Also note additional keys will be
491 added by the hooks runner. If the LU doesn't define any
492 environment, an empty dict (and not None) should be returned.
494 No nodes should be returned as an empty list (and not None).
496 Note that if the HPATH for a LU class is None, this function will
500 raise NotImplementedError
502 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
503 """Notify the LU about the results of its hooks.
505 This method is called every time a hooks phase is executed, and notifies
506 the Logical Unit about the hooks' result. The LU can then use it to alter
507 its result based on the hooks. By default the method does nothing and the
508 previous result is passed back unchanged but any LU can define it if it
509 wants to use the local cluster hook-scripts somehow.
511 @param phase: one of L{constants.HOOKS_PHASE_POST} or
512 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
513 @param hook_results: the results of the multi-node hooks rpc call
514 @param feedback_fn: function used send feedback back to the caller
515 @param lu_result: the previous Exec result this LU had, or None
517 @return: the new Exec result, based on the previous result
521 # API must be kept, thus we ignore the unused argument and could
522 # be a function warnings
523 # pylint: disable-msg=W0613,R0201
526 def _ExpandAndLockInstance(self):
527 """Helper function to expand and lock an instance.
529 Many LUs that work on an instance take its name in self.op.instance_name
530 and need to expand it and then declare the expanded name for locking. This
531 function does it, and then updates self.op.instance_name to the expanded
532 name. It also initializes needed_locks as a dict, if this hasn't been done
536 if self.needed_locks is None:
537 self.needed_locks = {}
539 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
540 "_ExpandAndLockInstance called with instance-level locks set"
541 self.op.instance_name = _ExpandInstanceName(self.cfg,
542 self.op.instance_name)
543 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
545 def _LockInstancesNodes(self, primary_only=False):
546 """Helper function to declare instances' nodes for locking.
548 This function should be called after locking one or more instances to lock
549 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
550 with all primary or secondary nodes for instances already locked and
551 present in self.needed_locks[locking.LEVEL_INSTANCE].
553 It should be called from DeclareLocks, and for safety only works if
554 self.recalculate_locks[locking.LEVEL_NODE] is set.
556 In the future it may grow parameters to just lock some instance's nodes, or
557 to just lock primaries or secondary nodes, if needed.
559 If should be called in DeclareLocks in a way similar to::
561 if level == locking.LEVEL_NODE:
562 self._LockInstancesNodes()
564 @type primary_only: boolean
565 @param primary_only: only lock primary nodes of locked instances
568 assert locking.LEVEL_NODE in self.recalculate_locks, \
569 "_LockInstancesNodes helper function called with no nodes to recalculate"
571 # TODO: check if we're really been called with the instance locks held
573 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
574 # future we might want to have different behaviors depending on the value
575 # of self.recalculate_locks[locking.LEVEL_NODE]
577 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
578 instance = self.context.cfg.GetInstanceInfo(instance_name)
579 wanted_nodes.append(instance.primary_node)
581 wanted_nodes.extend(instance.secondary_nodes)
583 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
584 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
585 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
586 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
588 del self.recalculate_locks[locking.LEVEL_NODE]
591 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
592 """Simple LU which runs no hooks.
594 This LU is intended as a parent for other LogicalUnits which will
595 run no hooks, in order to reduce duplicate code.
601 def BuildHooksEnv(self):
602 """Empty BuildHooksEnv for NoHooksLu.
604 This just raises an error.
607 assert False, "BuildHooksEnv called for NoHooksLUs"
611 """Tasklet base class.
613 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
614 they can mix legacy code with tasklets. Locking needs to be done in the LU,
615 tasklets know nothing about locks.
617 Subclasses must follow these rules:
618 - Implement CheckPrereq
622 def __init__(self, lu):
629 def CheckPrereq(self):
630 """Check prerequisites for this tasklets.
632 This method should check whether the prerequisites for the execution of
633 this tasklet are fulfilled. It can do internode communication, but it
634 should be idempotent - no cluster or system changes are allowed.
636 The method should raise errors.OpPrereqError in case something is not
637 fulfilled. Its return value is ignored.
639 This method should also update all parameters to their canonical form if it
640 hasn't been done before.
645 def Exec(self, feedback_fn):
646 """Execute the tasklet.
648 This method should implement the actual work. It should raise
649 errors.OpExecError for failures that are somewhat dealt with in code, or
653 raise NotImplementedError
656 def _GetWantedNodes(lu, nodes):
657 """Returns list of checked and expanded node names.
659 @type lu: L{LogicalUnit}
660 @param lu: the logical unit on whose behalf we execute
662 @param nodes: list of node names or None for all nodes
664 @return: the list of nodes, sorted
665 @raise errors.ProgrammerError: if the nodes parameter is wrong type
669 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
670 " non-empty list of nodes whose name is to be expanded.")
672 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
673 return utils.NiceSort(wanted)
676 def _GetWantedInstances(lu, instances):
677 """Returns list of checked and expanded instance names.
679 @type lu: L{LogicalUnit}
680 @param lu: the logical unit on whose behalf we execute
681 @type instances: list
682 @param instances: list of instance names or None for all instances
684 @return: the list of instances, sorted
685 @raise errors.OpPrereqError: if the instances parameter is wrong type
686 @raise errors.OpPrereqError: if any of the passed instances is not found
690 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
692 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
696 def _GetUpdatedParams(old_params, update_dict,
697 use_default=True, use_none=False):
698 """Return the new version of a parameter dictionary.
700 @type old_params: dict
701 @param old_params: old parameters
702 @type update_dict: dict
703 @param update_dict: dict containing new parameter values, or
704 constants.VALUE_DEFAULT to reset the parameter to its default
706 @param use_default: boolean
707 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
708 values as 'to be deleted' values
709 @param use_none: boolean
710 @type use_none: whether to recognise C{None} values as 'to be
713 @return: the new parameter dictionary
716 params_copy = copy.deepcopy(old_params)
717 for key, val in update_dict.iteritems():
718 if ((use_default and val == constants.VALUE_DEFAULT) or
719 (use_none and val is None)):
725 params_copy[key] = val
729 def _CheckOutputFields(static, dynamic, selected):
730 """Checks whether all selected fields are valid.
732 @type static: L{utils.FieldSet}
733 @param static: static fields set
734 @type dynamic: L{utils.FieldSet}
735 @param dynamic: dynamic fields set
742 delta = f.NonMatching(selected)
744 raise errors.OpPrereqError("Unknown output fields selected: %s"
745 % ",".join(delta), errors.ECODE_INVAL)
748 def _CheckGlobalHvParams(params):
749 """Validates that given hypervisor params are not global ones.
751 This will ensure that instances don't get customised versions of
755 used_globals = constants.HVC_GLOBALS.intersection(params)
757 msg = ("The following hypervisor parameters are global and cannot"
758 " be customized at instance level, please modify them at"
759 " cluster level: %s" % utils.CommaJoin(used_globals))
760 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
763 def _CheckNodeOnline(lu, node):
764 """Ensure that a given node is online.
766 @param lu: the LU on behalf of which we make the check
767 @param node: the node to check
768 @raise errors.OpPrereqError: if the node is offline
771 if lu.cfg.GetNodeInfo(node).offline:
772 raise errors.OpPrereqError("Can't use offline node %s" % node,
776 def _CheckNodeNotDrained(lu, node):
777 """Ensure that a given node is not drained.
779 @param lu: the LU on behalf of which we make the check
780 @param node: the node to check
781 @raise errors.OpPrereqError: if the node is drained
784 if lu.cfg.GetNodeInfo(node).drained:
785 raise errors.OpPrereqError("Can't use drained node %s" % node,
789 def _CheckNodeHasOS(lu, node, os_name, force_variant):
790 """Ensure that a node supports a given OS.
792 @param lu: the LU on behalf of which we make the check
793 @param node: the node to check
794 @param os_name: the OS to query about
795 @param force_variant: whether to ignore variant errors
796 @raise errors.OpPrereqError: if the node is not supporting the OS
799 result = lu.rpc.call_os_get(node, os_name)
800 result.Raise("OS '%s' not in supported OS list for node %s" %
802 prereq=True, ecode=errors.ECODE_INVAL)
803 if not force_variant:
804 _CheckOSVariant(result.payload, os_name)
807 def _RequireFileStorage():
808 """Checks that file storage is enabled.
810 @raise errors.OpPrereqError: when file storage is disabled
813 if not constants.ENABLE_FILE_STORAGE:
814 raise errors.OpPrereqError("File storage disabled at configure time",
818 def _CheckDiskTemplate(template):
819 """Ensure a given disk template is valid.
822 if template not in constants.DISK_TEMPLATES:
823 msg = ("Invalid disk template name '%s', valid templates are: %s" %
824 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
825 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
826 if template == constants.DT_FILE:
827 _RequireFileStorage()
831 def _CheckStorageType(storage_type):
832 """Ensure a given storage type is valid.
835 if storage_type not in constants.VALID_STORAGE_TYPES:
836 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
838 if storage_type == constants.ST_FILE:
839 _RequireFileStorage()
843 def _GetClusterDomainSecret():
844 """Reads the cluster domain secret.
847 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
851 def _CheckInstanceDown(lu, instance, reason):
852 """Ensure that an instance is not running."""
853 if instance.admin_up:
854 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
855 (instance.name, reason), errors.ECODE_STATE)
857 pnode = instance.primary_node
858 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
859 ins_l.Raise("Can't contact node %s for instance information" % pnode,
860 prereq=True, ecode=errors.ECODE_ENVIRON)
862 if instance.name in ins_l.payload:
863 raise errors.OpPrereqError("Instance %s is running, %s" %
864 (instance.name, reason), errors.ECODE_STATE)
867 def _ExpandItemName(fn, name, kind):
868 """Expand an item name.
870 @param fn: the function to use for expansion
871 @param name: requested item name
872 @param kind: text description ('Node' or 'Instance')
873 @return: the resolved (full) name
874 @raise errors.OpPrereqError: if the item is not found
878 if full_name is None:
879 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
884 def _ExpandNodeName(cfg, name):
885 """Wrapper over L{_ExpandItemName} for nodes."""
886 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
889 def _ExpandInstanceName(cfg, name):
890 """Wrapper over L{_ExpandItemName} for instance."""
891 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
894 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
895 memory, vcpus, nics, disk_template, disks,
896 bep, hvp, hypervisor_name):
897 """Builds instance related env variables for hooks
899 This builds the hook environment from individual variables.
902 @param name: the name of the instance
903 @type primary_node: string
904 @param primary_node: the name of the instance's primary node
905 @type secondary_nodes: list
906 @param secondary_nodes: list of secondary nodes as strings
907 @type os_type: string
908 @param os_type: the name of the instance's OS
909 @type status: boolean
910 @param status: the should_run status of the instance
912 @param memory: the memory size of the instance
914 @param vcpus: the count of VCPUs the instance has
916 @param nics: list of tuples (ip, mac, mode, link) representing
917 the NICs the instance has
918 @type disk_template: string
919 @param disk_template: the disk template of the instance
921 @param disks: the list of (size, mode) pairs
923 @param bep: the backend parameters for the instance
925 @param hvp: the hypervisor parameters for the instance
926 @type hypervisor_name: string
927 @param hypervisor_name: the hypervisor for the instance
929 @return: the hook environment for this instance
938 "INSTANCE_NAME": name,
939 "INSTANCE_PRIMARY": primary_node,
940 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
941 "INSTANCE_OS_TYPE": os_type,
942 "INSTANCE_STATUS": str_status,
943 "INSTANCE_MEMORY": memory,
944 "INSTANCE_VCPUS": vcpus,
945 "INSTANCE_DISK_TEMPLATE": disk_template,
946 "INSTANCE_HYPERVISOR": hypervisor_name,
950 nic_count = len(nics)
951 for idx, (ip, mac, mode, link) in enumerate(nics):
954 env["INSTANCE_NIC%d_IP" % idx] = ip
955 env["INSTANCE_NIC%d_MAC" % idx] = mac
956 env["INSTANCE_NIC%d_MODE" % idx] = mode
957 env["INSTANCE_NIC%d_LINK" % idx] = link
958 if mode == constants.NIC_MODE_BRIDGED:
959 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
963 env["INSTANCE_NIC_COUNT"] = nic_count
966 disk_count = len(disks)
967 for idx, (size, mode) in enumerate(disks):
968 env["INSTANCE_DISK%d_SIZE" % idx] = size
969 env["INSTANCE_DISK%d_MODE" % idx] = mode
973 env["INSTANCE_DISK_COUNT"] = disk_count
975 for source, kind in [(bep, "BE"), (hvp, "HV")]:
976 for key, value in source.items():
977 env["INSTANCE_%s_%s" % (kind, key)] = value
982 def _NICListToTuple(lu, nics):
983 """Build a list of nic information tuples.
985 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
986 value in LUQueryInstanceData.
988 @type lu: L{LogicalUnit}
989 @param lu: the logical unit on whose behalf we execute
990 @type nics: list of L{objects.NIC}
991 @param nics: list of nics to convert to hooks tuples
995 cluster = lu.cfg.GetClusterInfo()
999 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1000 mode = filled_params[constants.NIC_MODE]
1001 link = filled_params[constants.NIC_LINK]
1002 hooks_nics.append((ip, mac, mode, link))
1006 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1007 """Builds instance related env variables for hooks from an object.
1009 @type lu: L{LogicalUnit}
1010 @param lu: the logical unit on whose behalf we execute
1011 @type instance: L{objects.Instance}
1012 @param instance: the instance for which we should build the
1014 @type override: dict
1015 @param override: dictionary with key/values that will override
1018 @return: the hook environment dictionary
1021 cluster = lu.cfg.GetClusterInfo()
1022 bep = cluster.FillBE(instance)
1023 hvp = cluster.FillHV(instance)
1025 'name': instance.name,
1026 'primary_node': instance.primary_node,
1027 'secondary_nodes': instance.secondary_nodes,
1028 'os_type': instance.os,
1029 'status': instance.admin_up,
1030 'memory': bep[constants.BE_MEMORY],
1031 'vcpus': bep[constants.BE_VCPUS],
1032 'nics': _NICListToTuple(lu, instance.nics),
1033 'disk_template': instance.disk_template,
1034 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1037 'hypervisor_name': instance.hypervisor,
1040 args.update(override)
1041 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1044 def _AdjustCandidatePool(lu, exceptions):
1045 """Adjust the candidate pool after node operations.
1048 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1050 lu.LogInfo("Promoted nodes to master candidate role: %s",
1051 utils.CommaJoin(node.name for node in mod_list))
1052 for name in mod_list:
1053 lu.context.ReaddNode(name)
1054 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1056 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1060 def _DecideSelfPromotion(lu, exceptions=None):
1061 """Decide whether I should promote myself as a master candidate.
1064 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1065 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1066 # the new node will increase mc_max with one, so:
1067 mc_should = min(mc_should + 1, cp_size)
1068 return mc_now < mc_should
1071 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1072 """Check that the brigdes needed by a list of nics exist.
1075 cluster = lu.cfg.GetClusterInfo()
1076 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1077 brlist = [params[constants.NIC_LINK] for params in paramslist
1078 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1080 result = lu.rpc.call_bridges_exist(target_node, brlist)
1081 result.Raise("Error checking bridges on destination node '%s'" %
1082 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1085 def _CheckInstanceBridgesExist(lu, instance, node=None):
1086 """Check that the brigdes needed by an instance exist.
1090 node = instance.primary_node
1091 _CheckNicsBridgesExist(lu, instance.nics, node)
1094 def _CheckOSVariant(os_obj, name):
1095 """Check whether an OS name conforms to the os variants specification.
1097 @type os_obj: L{objects.OS}
1098 @param os_obj: OS object to check
1100 @param name: OS name passed by the user, to check for validity
1103 if not os_obj.supported_variants:
1105 variant = objects.OS.GetVariant(name)
1107 raise errors.OpPrereqError("OS name must include a variant",
1110 if variant not in os_obj.supported_variants:
1111 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1114 def _GetNodeInstancesInner(cfg, fn):
1115 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1118 def _GetNodeInstances(cfg, node_name):
1119 """Returns a list of all primary and secondary instances on a node.
1123 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1126 def _GetNodePrimaryInstances(cfg, node_name):
1127 """Returns primary instances on a node.
1130 return _GetNodeInstancesInner(cfg,
1131 lambda inst: node_name == inst.primary_node)
1134 def _GetNodeSecondaryInstances(cfg, node_name):
1135 """Returns secondary instances on a node.
1138 return _GetNodeInstancesInner(cfg,
1139 lambda inst: node_name in inst.secondary_nodes)
1142 def _GetStorageTypeArgs(cfg, storage_type):
1143 """Returns the arguments for a storage type.
1146 # Special case for file storage
1147 if storage_type == constants.ST_FILE:
1148 # storage.FileStorage wants a list of storage directories
1149 return [[cfg.GetFileStorageDir()]]
1154 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1157 for dev in instance.disks:
1158 cfg.SetDiskID(dev, node_name)
1160 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1161 result.Raise("Failed to get disk status from node %s" % node_name,
1162 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1164 for idx, bdev_status in enumerate(result.payload):
1165 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1171 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1172 """Check the sanity of iallocator and node arguments and use the
1173 cluster-wide iallocator if appropriate.
1175 Check that at most one of (iallocator, node) is specified. If none is
1176 specified, then the LU's opcode's iallocator slot is filled with the
1177 cluster-wide default iallocator.
1179 @type iallocator_slot: string
1180 @param iallocator_slot: the name of the opcode iallocator slot
1181 @type node_slot: string
1182 @param node_slot: the name of the opcode target node slot
1185 node = getattr(lu.op, node_slot, None)
1186 iallocator = getattr(lu.op, iallocator_slot, None)
1188 if node is not None and iallocator is not None:
1189 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1191 elif node is None and iallocator is None:
1192 default_iallocator = lu.cfg.GetDefaultIAllocator()
1193 if default_iallocator:
1194 setattr(lu.op, iallocator_slot, default_iallocator)
1196 raise errors.OpPrereqError("No iallocator or node given and no"
1197 " cluster-wide default iallocator found."
1198 " Please specify either an iallocator or a"
1199 " node, or set a cluster-wide default"
1203 class LUPostInitCluster(LogicalUnit):
1204 """Logical unit for running hooks after cluster initialization.
1207 HPATH = "cluster-init"
1208 HTYPE = constants.HTYPE_CLUSTER
1210 def BuildHooksEnv(self):
1214 env = {"OP_TARGET": self.cfg.GetClusterName()}
1215 mn = self.cfg.GetMasterNode()
1216 return env, [], [mn]
1218 def Exec(self, feedback_fn):
1225 class LUDestroyCluster(LogicalUnit):
1226 """Logical unit for destroying the cluster.
1229 HPATH = "cluster-destroy"
1230 HTYPE = constants.HTYPE_CLUSTER
1232 def BuildHooksEnv(self):
1236 env = {"OP_TARGET": self.cfg.GetClusterName()}
1239 def CheckPrereq(self):
1240 """Check prerequisites.
1242 This checks whether the cluster is empty.
1244 Any errors are signaled by raising errors.OpPrereqError.
1247 master = self.cfg.GetMasterNode()
1249 nodelist = self.cfg.GetNodeList()
1250 if len(nodelist) != 1 or nodelist[0] != master:
1251 raise errors.OpPrereqError("There are still %d node(s) in"
1252 " this cluster." % (len(nodelist) - 1),
1254 instancelist = self.cfg.GetInstanceList()
1256 raise errors.OpPrereqError("There are still %d instance(s) in"
1257 " this cluster." % len(instancelist),
1260 def Exec(self, feedback_fn):
1261 """Destroys the cluster.
1264 master = self.cfg.GetMasterNode()
1266 # Run post hooks on master node before it's removed
1267 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1269 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1271 # pylint: disable-msg=W0702
1272 self.LogWarning("Errors occurred running hooks on %s" % master)
1274 result = self.rpc.call_node_stop_master(master, False)
1275 result.Raise("Could not disable the master role")
1280 def _VerifyCertificate(filename):
1281 """Verifies a certificate for LUVerifyCluster.
1283 @type filename: string
1284 @param filename: Path to PEM file
1288 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1289 utils.ReadFile(filename))
1290 except Exception, err: # pylint: disable-msg=W0703
1291 return (LUVerifyCluster.ETYPE_ERROR,
1292 "Failed to load X509 certificate %s: %s" % (filename, err))
1295 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1296 constants.SSL_CERT_EXPIRATION_ERROR)
1299 fnamemsg = "While verifying %s: %s" % (filename, msg)
1304 return (None, fnamemsg)
1305 elif errcode == utils.CERT_WARNING:
1306 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1307 elif errcode == utils.CERT_ERROR:
1308 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1310 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1313 class LUVerifyCluster(LogicalUnit):
1314 """Verifies the cluster status.
1317 HPATH = "cluster-verify"
1318 HTYPE = constants.HTYPE_CLUSTER
1320 ("skip_checks", _EmptyList,
1321 _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1322 ("verbose", False, _TBool),
1323 ("error_codes", False, _TBool),
1324 ("debug_simulate_errors", False, _TBool),
1328 TCLUSTER = "cluster"
1330 TINSTANCE = "instance"
1332 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1333 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1334 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1335 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1336 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1337 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1338 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1339 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1340 ENODEDRBD = (TNODE, "ENODEDRBD")
1341 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1342 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1343 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1344 ENODEHV = (TNODE, "ENODEHV")
1345 ENODELVM = (TNODE, "ENODELVM")
1346 ENODEN1 = (TNODE, "ENODEN1")
1347 ENODENET = (TNODE, "ENODENET")
1348 ENODEOS = (TNODE, "ENODEOS")
1349 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1350 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1351 ENODERPC = (TNODE, "ENODERPC")
1352 ENODESSH = (TNODE, "ENODESSH")
1353 ENODEVERSION = (TNODE, "ENODEVERSION")
1354 ENODESETUP = (TNODE, "ENODESETUP")
1355 ENODETIME = (TNODE, "ENODETIME")
1357 ETYPE_FIELD = "code"
1358 ETYPE_ERROR = "ERROR"
1359 ETYPE_WARNING = "WARNING"
1361 class NodeImage(object):
1362 """A class representing the logical and physical status of a node.
1365 @ivar name: the node name to which this object refers
1366 @ivar volumes: a structure as returned from
1367 L{ganeti.backend.GetVolumeList} (runtime)
1368 @ivar instances: a list of running instances (runtime)
1369 @ivar pinst: list of configured primary instances (config)
1370 @ivar sinst: list of configured secondary instances (config)
1371 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1372 of this node (config)
1373 @ivar mfree: free memory, as reported by hypervisor (runtime)
1374 @ivar dfree: free disk, as reported by the node (runtime)
1375 @ivar offline: the offline status (config)
1376 @type rpc_fail: boolean
1377 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1378 not whether the individual keys were correct) (runtime)
1379 @type lvm_fail: boolean
1380 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1381 @type hyp_fail: boolean
1382 @ivar hyp_fail: whether the RPC call didn't return the instance list
1383 @type ghost: boolean
1384 @ivar ghost: whether this is a known node or not (config)
1385 @type os_fail: boolean
1386 @ivar os_fail: whether the RPC call didn't return valid OS data
1388 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1391 def __init__(self, offline=False, name=None):
1400 self.offline = offline
1401 self.rpc_fail = False
1402 self.lvm_fail = False
1403 self.hyp_fail = False
1405 self.os_fail = False
1408 def ExpandNames(self):
1409 self.needed_locks = {
1410 locking.LEVEL_NODE: locking.ALL_SET,
1411 locking.LEVEL_INSTANCE: locking.ALL_SET,
1413 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1415 def _Error(self, ecode, item, msg, *args, **kwargs):
1416 """Format an error message.
1418 Based on the opcode's error_codes parameter, either format a
1419 parseable error code, or a simpler error string.
1421 This must be called only from Exec and functions called from Exec.
1424 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1426 # first complete the msg
1429 # then format the whole message
1430 if self.op.error_codes:
1431 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1437 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1438 # and finally report it via the feedback_fn
1439 self._feedback_fn(" - %s" % msg)
1441 def _ErrorIf(self, cond, *args, **kwargs):
1442 """Log an error message if the passed condition is True.
1445 cond = bool(cond) or self.op.debug_simulate_errors
1447 self._Error(*args, **kwargs)
1448 # do not mark the operation as failed for WARN cases only
1449 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1450 self.bad = self.bad or cond
1452 def _VerifyNode(self, ninfo, nresult):
1453 """Perform some basic validation on data returned from a node.
1455 - check the result data structure is well formed and has all the
1457 - check ganeti version
1459 @type ninfo: L{objects.Node}
1460 @param ninfo: the node to check
1461 @param nresult: the results from the node
1463 @return: whether overall this call was successful (and we can expect
1464 reasonable values in the respose)
1468 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1470 # main result, nresult should be a non-empty dict
1471 test = not nresult or not isinstance(nresult, dict)
1472 _ErrorIf(test, self.ENODERPC, node,
1473 "unable to verify node: no data returned")
1477 # compares ganeti version
1478 local_version = constants.PROTOCOL_VERSION
1479 remote_version = nresult.get("version", None)
1480 test = not (remote_version and
1481 isinstance(remote_version, (list, tuple)) and
1482 len(remote_version) == 2)
1483 _ErrorIf(test, self.ENODERPC, node,
1484 "connection to node returned invalid data")
1488 test = local_version != remote_version[0]
1489 _ErrorIf(test, self.ENODEVERSION, node,
1490 "incompatible protocol versions: master %s,"
1491 " node %s", local_version, remote_version[0])
1495 # node seems compatible, we can actually try to look into its results
1497 # full package version
1498 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1499 self.ENODEVERSION, node,
1500 "software version mismatch: master %s, node %s",
1501 constants.RELEASE_VERSION, remote_version[1],
1502 code=self.ETYPE_WARNING)
1504 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1505 if isinstance(hyp_result, dict):
1506 for hv_name, hv_result in hyp_result.iteritems():
1507 test = hv_result is not None
1508 _ErrorIf(test, self.ENODEHV, node,
1509 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1512 test = nresult.get(constants.NV_NODESETUP,
1513 ["Missing NODESETUP results"])
1514 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1519 def _VerifyNodeTime(self, ninfo, nresult,
1520 nvinfo_starttime, nvinfo_endtime):
1521 """Check the node time.
1523 @type ninfo: L{objects.Node}
1524 @param ninfo: the node to check
1525 @param nresult: the remote results for the node
1526 @param nvinfo_starttime: the start time of the RPC call
1527 @param nvinfo_endtime: the end time of the RPC call
1531 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1533 ntime = nresult.get(constants.NV_TIME, None)
1535 ntime_merged = utils.MergeTime(ntime)
1536 except (ValueError, TypeError):
1537 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1540 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1541 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1542 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1543 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1547 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1548 "Node time diverges by at least %s from master node time",
1551 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1552 """Check the node time.
1554 @type ninfo: L{objects.Node}
1555 @param ninfo: the node to check
1556 @param nresult: the remote results for the node
1557 @param vg_name: the configured VG name
1564 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1566 # checks vg existence and size > 20G
1567 vglist = nresult.get(constants.NV_VGLIST, None)
1569 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1571 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1572 constants.MIN_VG_SIZE)
1573 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1576 pvlist = nresult.get(constants.NV_PVLIST, None)
1577 test = pvlist is None
1578 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1580 # check that ':' is not present in PV names, since it's a
1581 # special character for lvcreate (denotes the range of PEs to
1583 for _, pvname, owner_vg in pvlist:
1584 test = ":" in pvname
1585 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1586 " '%s' of VG '%s'", pvname, owner_vg)
1588 def _VerifyNodeNetwork(self, ninfo, nresult):
1589 """Check the node time.
1591 @type ninfo: L{objects.Node}
1592 @param ninfo: the node to check
1593 @param nresult: the remote results for the node
1597 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1599 test = constants.NV_NODELIST not in nresult
1600 _ErrorIf(test, self.ENODESSH, node,
1601 "node hasn't returned node ssh connectivity data")
1603 if nresult[constants.NV_NODELIST]:
1604 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1605 _ErrorIf(True, self.ENODESSH, node,
1606 "ssh communication with node '%s': %s", a_node, a_msg)
1608 test = constants.NV_NODENETTEST not in nresult
1609 _ErrorIf(test, self.ENODENET, node,
1610 "node hasn't returned node tcp connectivity data")
1612 if nresult[constants.NV_NODENETTEST]:
1613 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1615 _ErrorIf(True, self.ENODENET, node,
1616 "tcp communication with node '%s': %s",
1617 anode, nresult[constants.NV_NODENETTEST][anode])
1619 test = constants.NV_MASTERIP not in nresult
1620 _ErrorIf(test, self.ENODENET, node,
1621 "node hasn't returned node master IP reachability data")
1623 if not nresult[constants.NV_MASTERIP]:
1624 if node == self.master_node:
1625 msg = "the master node cannot reach the master IP (not configured?)"
1627 msg = "cannot reach the master IP"
1628 _ErrorIf(True, self.ENODENET, node, msg)
1631 def _VerifyInstance(self, instance, instanceconfig, node_image):
1632 """Verify an instance.
1634 This function checks to see if the required block devices are
1635 available on the instance's node.
1638 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1639 node_current = instanceconfig.primary_node
1641 node_vol_should = {}
1642 instanceconfig.MapLVsByNode(node_vol_should)
1644 for node in node_vol_should:
1645 n_img = node_image[node]
1646 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1647 # ignore missing volumes on offline or broken nodes
1649 for volume in node_vol_should[node]:
1650 test = volume not in n_img.volumes
1651 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1652 "volume %s missing on node %s", volume, node)
1654 if instanceconfig.admin_up:
1655 pri_img = node_image[node_current]
1656 test = instance not in pri_img.instances and not pri_img.offline
1657 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1658 "instance not running on its primary node %s",
1661 for node, n_img in node_image.items():
1662 if (not node == node_current):
1663 test = instance in n_img.instances
1664 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1665 "instance should not run on node %s", node)
1667 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1668 """Verify if there are any unknown volumes in the cluster.
1670 The .os, .swap and backup volumes are ignored. All other volumes are
1671 reported as unknown.
1673 @type reserved: L{ganeti.utils.FieldSet}
1674 @param reserved: a FieldSet of reserved volume names
1677 for node, n_img in node_image.items():
1678 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1679 # skip non-healthy nodes
1681 for volume in n_img.volumes:
1682 test = ((node not in node_vol_should or
1683 volume not in node_vol_should[node]) and
1684 not reserved.Matches(volume))
1685 self._ErrorIf(test, self.ENODEORPHANLV, node,
1686 "volume %s is unknown", volume)
1688 def _VerifyOrphanInstances(self, instancelist, node_image):
1689 """Verify the list of running instances.
1691 This checks what instances are running but unknown to the cluster.
1694 for node, n_img in node_image.items():
1695 for o_inst in n_img.instances:
1696 test = o_inst not in instancelist
1697 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1698 "instance %s on node %s should not exist", o_inst, node)
1700 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1701 """Verify N+1 Memory Resilience.
1703 Check that if one single node dies we can still start all the
1704 instances it was primary for.
1707 for node, n_img in node_image.items():
1708 # This code checks that every node which is now listed as
1709 # secondary has enough memory to host all instances it is
1710 # supposed to should a single other node in the cluster fail.
1711 # FIXME: not ready for failover to an arbitrary node
1712 # FIXME: does not support file-backed instances
1713 # WARNING: we currently take into account down instances as well
1714 # as up ones, considering that even if they're down someone
1715 # might want to start them even in the event of a node failure.
1716 for prinode, instances in n_img.sbp.items():
1718 for instance in instances:
1719 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1720 if bep[constants.BE_AUTO_BALANCE]:
1721 needed_mem += bep[constants.BE_MEMORY]
1722 test = n_img.mfree < needed_mem
1723 self._ErrorIf(test, self.ENODEN1, node,
1724 "not enough memory on to accommodate"
1725 " failovers should peer node %s fail", prinode)
1727 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1729 """Verifies and computes the node required file checksums.
1731 @type ninfo: L{objects.Node}
1732 @param ninfo: the node to check
1733 @param nresult: the remote results for the node
1734 @param file_list: required list of files
1735 @param local_cksum: dictionary of local files and their checksums
1736 @param master_files: list of files that only masters should have
1740 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1742 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1743 test = not isinstance(remote_cksum, dict)
1744 _ErrorIf(test, self.ENODEFILECHECK, node,
1745 "node hasn't returned file checksum data")
1749 for file_name in file_list:
1750 node_is_mc = ninfo.master_candidate
1751 must_have = (file_name not in master_files) or node_is_mc
1753 test1 = file_name not in remote_cksum
1755 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1757 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1758 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1759 "file '%s' missing", file_name)
1760 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1761 "file '%s' has wrong checksum", file_name)
1762 # not candidate and this is not a must-have file
1763 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1764 "file '%s' should not exist on non master"
1765 " candidates (and the file is outdated)", file_name)
1766 # all good, except non-master/non-must have combination
1767 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1768 "file '%s' should not exist"
1769 " on non master candidates", file_name)
1771 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1773 """Verifies and the node DRBD status.
1775 @type ninfo: L{objects.Node}
1776 @param ninfo: the node to check
1777 @param nresult: the remote results for the node
1778 @param instanceinfo: the dict of instances
1779 @param drbd_helper: the configured DRBD usermode helper
1780 @param drbd_map: the DRBD map as returned by
1781 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1785 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1788 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1789 test = (helper_result == None)
1790 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1791 "no drbd usermode helper returned")
1793 status, payload = helper_result
1795 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1796 "drbd usermode helper check unsuccessful: %s", payload)
1797 test = status and (payload != drbd_helper)
1798 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1799 "wrong drbd usermode helper: %s", payload)
1801 # compute the DRBD minors
1803 for minor, instance in drbd_map[node].items():
1804 test = instance not in instanceinfo
1805 _ErrorIf(test, self.ECLUSTERCFG, None,
1806 "ghost instance '%s' in temporary DRBD map", instance)
1807 # ghost instance should not be running, but otherwise we
1808 # don't give double warnings (both ghost instance and
1809 # unallocated minor in use)
1811 node_drbd[minor] = (instance, False)
1813 instance = instanceinfo[instance]
1814 node_drbd[minor] = (instance.name, instance.admin_up)
1816 # and now check them
1817 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1818 test = not isinstance(used_minors, (tuple, list))
1819 _ErrorIf(test, self.ENODEDRBD, node,
1820 "cannot parse drbd status file: %s", str(used_minors))
1822 # we cannot check drbd status
1825 for minor, (iname, must_exist) in node_drbd.items():
1826 test = minor not in used_minors and must_exist
1827 _ErrorIf(test, self.ENODEDRBD, node,
1828 "drbd minor %d of instance %s is not active", minor, iname)
1829 for minor in used_minors:
1830 test = minor not in node_drbd
1831 _ErrorIf(test, self.ENODEDRBD, node,
1832 "unallocated drbd minor %d is in use", minor)
1834 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1835 """Builds the node OS structures.
1837 @type ninfo: L{objects.Node}
1838 @param ninfo: the node to check
1839 @param nresult: the remote results for the node
1840 @param nimg: the node image object
1844 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1846 remote_os = nresult.get(constants.NV_OSLIST, None)
1847 test = (not isinstance(remote_os, list) or
1848 not compat.all(isinstance(v, list) and len(v) == 7
1849 for v in remote_os))
1851 _ErrorIf(test, self.ENODEOS, node,
1852 "node hasn't returned valid OS data")
1861 for (name, os_path, status, diagnose,
1862 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1864 if name not in os_dict:
1867 # parameters is a list of lists instead of list of tuples due to
1868 # JSON lacking a real tuple type, fix it:
1869 parameters = [tuple(v) for v in parameters]
1870 os_dict[name].append((os_path, status, diagnose,
1871 set(variants), set(parameters), set(api_ver)))
1873 nimg.oslist = os_dict
1875 def _VerifyNodeOS(self, ninfo, nimg, base):
1876 """Verifies the node OS list.
1878 @type ninfo: L{objects.Node}
1879 @param ninfo: the node to check
1880 @param nimg: the node image object
1881 @param base: the 'template' node we match against (e.g. from the master)
1885 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1887 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1889 for os_name, os_data in nimg.oslist.items():
1890 assert os_data, "Empty OS status for OS %s?!" % os_name
1891 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1892 _ErrorIf(not f_status, self.ENODEOS, node,
1893 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1894 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1895 "OS '%s' has multiple entries (first one shadows the rest): %s",
1896 os_name, utils.CommaJoin([v[0] for v in os_data]))
1897 # this will catched in backend too
1898 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1899 and not f_var, self.ENODEOS, node,
1900 "OS %s with API at least %d does not declare any variant",
1901 os_name, constants.OS_API_V15)
1902 # comparisons with the 'base' image
1903 test = os_name not in base.oslist
1904 _ErrorIf(test, self.ENODEOS, node,
1905 "Extra OS %s not present on reference node (%s)",
1909 assert base.oslist[os_name], "Base node has empty OS status?"
1910 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1912 # base OS is invalid, skipping
1914 for kind, a, b in [("API version", f_api, b_api),
1915 ("variants list", f_var, b_var),
1916 ("parameters", f_param, b_param)]:
1917 _ErrorIf(a != b, self.ENODEOS, node,
1918 "OS %s %s differs from reference node %s: %s vs. %s",
1919 kind, os_name, base.name,
1920 utils.CommaJoin(a), utils.CommaJoin(b))
1922 # check any missing OSes
1923 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1924 _ErrorIf(missing, self.ENODEOS, node,
1925 "OSes present on reference node %s but missing on this node: %s",
1926 base.name, utils.CommaJoin(missing))
1928 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1929 """Verifies and updates the node volume data.
1931 This function will update a L{NodeImage}'s internal structures
1932 with data from the remote call.
1934 @type ninfo: L{objects.Node}
1935 @param ninfo: the node to check
1936 @param nresult: the remote results for the node
1937 @param nimg: the node image object
1938 @param vg_name: the configured VG name
1942 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1944 nimg.lvm_fail = True
1945 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1948 elif isinstance(lvdata, basestring):
1949 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1950 utils.SafeEncode(lvdata))
1951 elif not isinstance(lvdata, dict):
1952 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1954 nimg.volumes = lvdata
1955 nimg.lvm_fail = False
1957 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1958 """Verifies and updates the node instance list.
1960 If the listing was successful, then updates this node's instance
1961 list. Otherwise, it marks the RPC call as failed for the instance
1964 @type ninfo: L{objects.Node}
1965 @param ninfo: the node to check
1966 @param nresult: the remote results for the node
1967 @param nimg: the node image object
1970 idata = nresult.get(constants.NV_INSTANCELIST, None)
1971 test = not isinstance(idata, list)
1972 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1973 " (instancelist): %s", utils.SafeEncode(str(idata)))
1975 nimg.hyp_fail = True
1977 nimg.instances = idata
1979 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1980 """Verifies and computes a node information map
1982 @type ninfo: L{objects.Node}
1983 @param ninfo: the node to check
1984 @param nresult: the remote results for the node
1985 @param nimg: the node image object
1986 @param vg_name: the configured VG name
1990 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1992 # try to read free memory (from the hypervisor)
1993 hv_info = nresult.get(constants.NV_HVINFO, None)
1994 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1995 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1998 nimg.mfree = int(hv_info["memory_free"])
1999 except (ValueError, TypeError):
2000 _ErrorIf(True, self.ENODERPC, node,
2001 "node returned invalid nodeinfo, check hypervisor")
2003 # FIXME: devise a free space model for file based instances as well
2004 if vg_name is not None:
2005 test = (constants.NV_VGLIST not in nresult or
2006 vg_name not in nresult[constants.NV_VGLIST])
2007 _ErrorIf(test, self.ENODELVM, node,
2008 "node didn't return data for the volume group '%s'"
2009 " - it is either missing or broken", vg_name)
2012 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2013 except (ValueError, TypeError):
2014 _ErrorIf(True, self.ENODERPC, node,
2015 "node returned invalid LVM info, check LVM status")
2017 def BuildHooksEnv(self):
2020 Cluster-Verify hooks just ran in the post phase and their failure makes
2021 the output be logged in the verify output and the verification to fail.
2024 all_nodes = self.cfg.GetNodeList()
2026 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2028 for node in self.cfg.GetAllNodesInfo().values():
2029 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2031 return env, [], all_nodes
2033 def Exec(self, feedback_fn):
2034 """Verify integrity of cluster, performing various test on nodes.
2038 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2039 verbose = self.op.verbose
2040 self._feedback_fn = feedback_fn
2041 feedback_fn("* Verifying global settings")
2042 for msg in self.cfg.VerifyConfig():
2043 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2045 # Check the cluster certificates
2046 for cert_filename in constants.ALL_CERT_FILES:
2047 (errcode, msg) = _VerifyCertificate(cert_filename)
2048 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2050 vg_name = self.cfg.GetVGName()
2051 drbd_helper = self.cfg.GetDRBDHelper()
2052 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2053 cluster = self.cfg.GetClusterInfo()
2054 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2055 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2056 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2057 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2058 for iname in instancelist)
2059 i_non_redundant = [] # Non redundant instances
2060 i_non_a_balanced = [] # Non auto-balanced instances
2061 n_offline = 0 # Count of offline nodes
2062 n_drained = 0 # Count of nodes being drained
2063 node_vol_should = {}
2065 # FIXME: verify OS list
2066 # do local checksums
2067 master_files = [constants.CLUSTER_CONF_FILE]
2068 master_node = self.master_node = self.cfg.GetMasterNode()
2069 master_ip = self.cfg.GetMasterIP()
2071 file_names = ssconf.SimpleStore().GetFileList()
2072 file_names.extend(constants.ALL_CERT_FILES)
2073 file_names.extend(master_files)
2074 if cluster.modify_etc_hosts:
2075 file_names.append(constants.ETC_HOSTS)
2077 local_checksums = utils.FingerprintFiles(file_names)
2079 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2080 node_verify_param = {
2081 constants.NV_FILELIST: file_names,
2082 constants.NV_NODELIST: [node.name for node in nodeinfo
2083 if not node.offline],
2084 constants.NV_HYPERVISOR: hypervisors,
2085 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2086 node.secondary_ip) for node in nodeinfo
2087 if not node.offline],
2088 constants.NV_INSTANCELIST: hypervisors,
2089 constants.NV_VERSION: None,
2090 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2091 constants.NV_NODESETUP: None,
2092 constants.NV_TIME: None,
2093 constants.NV_MASTERIP: (master_node, master_ip),
2094 constants.NV_OSLIST: None,
2097 if vg_name is not None:
2098 node_verify_param[constants.NV_VGLIST] = None
2099 node_verify_param[constants.NV_LVLIST] = vg_name
2100 node_verify_param[constants.NV_PVLIST] = [vg_name]
2101 node_verify_param[constants.NV_DRBDLIST] = None
2104 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2106 # Build our expected cluster state
2107 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2109 for node in nodeinfo)
2111 for instance in instancelist:
2112 inst_config = instanceinfo[instance]
2114 for nname in inst_config.all_nodes:
2115 if nname not in node_image:
2117 gnode = self.NodeImage(name=nname)
2119 node_image[nname] = gnode
2121 inst_config.MapLVsByNode(node_vol_should)
2123 pnode = inst_config.primary_node
2124 node_image[pnode].pinst.append(instance)
2126 for snode in inst_config.secondary_nodes:
2127 nimg = node_image[snode]
2128 nimg.sinst.append(instance)
2129 if pnode not in nimg.sbp:
2130 nimg.sbp[pnode] = []
2131 nimg.sbp[pnode].append(instance)
2133 # At this point, we have the in-memory data structures complete,
2134 # except for the runtime information, which we'll gather next
2136 # Due to the way our RPC system works, exact response times cannot be
2137 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2138 # time before and after executing the request, we can at least have a time
2140 nvinfo_starttime = time.time()
2141 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2142 self.cfg.GetClusterName())
2143 nvinfo_endtime = time.time()
2145 all_drbd_map = self.cfg.ComputeDRBDMap()
2147 feedback_fn("* Verifying node status")
2151 for node_i in nodeinfo:
2153 nimg = node_image[node]
2157 feedback_fn("* Skipping offline node %s" % (node,))
2161 if node == master_node:
2163 elif node_i.master_candidate:
2164 ntype = "master candidate"
2165 elif node_i.drained:
2171 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2173 msg = all_nvinfo[node].fail_msg
2174 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2176 nimg.rpc_fail = True
2179 nresult = all_nvinfo[node].payload
2181 nimg.call_ok = self._VerifyNode(node_i, nresult)
2182 self._VerifyNodeNetwork(node_i, nresult)
2183 self._VerifyNodeLVM(node_i, nresult, vg_name)
2184 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2186 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2188 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2190 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2191 self._UpdateNodeInstances(node_i, nresult, nimg)
2192 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2193 self._UpdateNodeOS(node_i, nresult, nimg)
2194 if not nimg.os_fail:
2195 if refos_img is None:
2197 self._VerifyNodeOS(node_i, nimg, refos_img)
2199 feedback_fn("* Verifying instance status")
2200 for instance in instancelist:
2202 feedback_fn("* Verifying instance %s" % instance)
2203 inst_config = instanceinfo[instance]
2204 self._VerifyInstance(instance, inst_config, node_image)
2205 inst_nodes_offline = []
2207 pnode = inst_config.primary_node
2208 pnode_img = node_image[pnode]
2209 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2210 self.ENODERPC, pnode, "instance %s, connection to"
2211 " primary node failed", instance)
2213 if pnode_img.offline:
2214 inst_nodes_offline.append(pnode)
2216 # If the instance is non-redundant we cannot survive losing its primary
2217 # node, so we are not N+1 compliant. On the other hand we have no disk
2218 # templates with more than one secondary so that situation is not well
2220 # FIXME: does not support file-backed instances
2221 if not inst_config.secondary_nodes:
2222 i_non_redundant.append(instance)
2223 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2224 instance, "instance has multiple secondary nodes: %s",
2225 utils.CommaJoin(inst_config.secondary_nodes),
2226 code=self.ETYPE_WARNING)
2228 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2229 i_non_a_balanced.append(instance)
2231 for snode in inst_config.secondary_nodes:
2232 s_img = node_image[snode]
2233 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2234 "instance %s, connection to secondary node failed", instance)
2237 inst_nodes_offline.append(snode)
2239 # warn that the instance lives on offline nodes
2240 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2241 "instance lives on offline node(s) %s",
2242 utils.CommaJoin(inst_nodes_offline))
2243 # ... or ghost nodes
2244 for node in inst_config.all_nodes:
2245 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2246 "instance lives on ghost node %s", node)
2248 feedback_fn("* Verifying orphan volumes")
2249 reserved = utils.FieldSet(*cluster.reserved_lvs)
2250 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2252 feedback_fn("* Verifying orphan instances")
2253 self._VerifyOrphanInstances(instancelist, node_image)
2255 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2256 feedback_fn("* Verifying N+1 Memory redundancy")
2257 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2259 feedback_fn("* Other Notes")
2261 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2262 % len(i_non_redundant))
2264 if i_non_a_balanced:
2265 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2266 % len(i_non_a_balanced))
2269 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2272 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2276 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2277 """Analyze the post-hooks' result
2279 This method analyses the hook result, handles it, and sends some
2280 nicely-formatted feedback back to the user.
2282 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2283 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2284 @param hooks_results: the results of the multi-node hooks rpc call
2285 @param feedback_fn: function used send feedback back to the caller
2286 @param lu_result: previous Exec result
2287 @return: the new Exec result, based on the previous result
2291 # We only really run POST phase hooks, and are only interested in
2293 if phase == constants.HOOKS_PHASE_POST:
2294 # Used to change hooks' output to proper indentation
2295 indent_re = re.compile('^', re.M)
2296 feedback_fn("* Hooks Results")
2297 assert hooks_results, "invalid result from hooks"
2299 for node_name in hooks_results:
2300 res = hooks_results[node_name]
2302 test = msg and not res.offline
2303 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2304 "Communication failure in hooks execution: %s", msg)
2305 if res.offline or msg:
2306 # No need to investigate payload if node is offline or gave an error.
2307 # override manually lu_result here as _ErrorIf only
2308 # overrides self.bad
2311 for script, hkr, output in res.payload:
2312 test = hkr == constants.HKR_FAIL
2313 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2314 "Script %s failed, output:", script)
2316 output = indent_re.sub(' ', output)
2317 feedback_fn("%s" % output)
2323 class LUVerifyDisks(NoHooksLU):
2324 """Verifies the cluster disks status.
2329 def ExpandNames(self):
2330 self.needed_locks = {
2331 locking.LEVEL_NODE: locking.ALL_SET,
2332 locking.LEVEL_INSTANCE: locking.ALL_SET,
2334 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2336 def Exec(self, feedback_fn):
2337 """Verify integrity of cluster disks.
2339 @rtype: tuple of three items
2340 @return: a tuple of (dict of node-to-node_error, list of instances
2341 which need activate-disks, dict of instance: (node, volume) for
2345 result = res_nodes, res_instances, res_missing = {}, [], {}
2347 vg_name = self.cfg.GetVGName()
2348 nodes = utils.NiceSort(self.cfg.GetNodeList())
2349 instances = [self.cfg.GetInstanceInfo(name)
2350 for name in self.cfg.GetInstanceList()]
2353 for inst in instances:
2355 if (not inst.admin_up or
2356 inst.disk_template not in constants.DTS_NET_MIRROR):
2358 inst.MapLVsByNode(inst_lvs)
2359 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2360 for node, vol_list in inst_lvs.iteritems():
2361 for vol in vol_list:
2362 nv_dict[(node, vol)] = inst
2367 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2371 node_res = node_lvs[node]
2372 if node_res.offline:
2374 msg = node_res.fail_msg
2376 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2377 res_nodes[node] = msg
2380 lvs = node_res.payload
2381 for lv_name, (_, _, lv_online) in lvs.items():
2382 inst = nv_dict.pop((node, lv_name), None)
2383 if (not lv_online and inst is not None
2384 and inst.name not in res_instances):
2385 res_instances.append(inst.name)
2387 # any leftover items in nv_dict are missing LVs, let's arrange the
2389 for key, inst in nv_dict.iteritems():
2390 if inst.name not in res_missing:
2391 res_missing[inst.name] = []
2392 res_missing[inst.name].append(key)
2397 class LURepairDiskSizes(NoHooksLU):
2398 """Verifies the cluster disks sizes.
2401 _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2404 def ExpandNames(self):
2405 if self.op.instances:
2406 self.wanted_names = []
2407 for name in self.op.instances:
2408 full_name = _ExpandInstanceName(self.cfg, name)
2409 self.wanted_names.append(full_name)
2410 self.needed_locks = {
2411 locking.LEVEL_NODE: [],
2412 locking.LEVEL_INSTANCE: self.wanted_names,
2414 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2416 self.wanted_names = None
2417 self.needed_locks = {
2418 locking.LEVEL_NODE: locking.ALL_SET,
2419 locking.LEVEL_INSTANCE: locking.ALL_SET,
2421 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2423 def DeclareLocks(self, level):
2424 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2425 self._LockInstancesNodes(primary_only=True)
2427 def CheckPrereq(self):
2428 """Check prerequisites.
2430 This only checks the optional instance list against the existing names.
2433 if self.wanted_names is None:
2434 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2436 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2437 in self.wanted_names]
2439 def _EnsureChildSizes(self, disk):
2440 """Ensure children of the disk have the needed disk size.
2442 This is valid mainly for DRBD8 and fixes an issue where the
2443 children have smaller disk size.
2445 @param disk: an L{ganeti.objects.Disk} object
2448 if disk.dev_type == constants.LD_DRBD8:
2449 assert disk.children, "Empty children for DRBD8?"
2450 fchild = disk.children[0]
2451 mismatch = fchild.size < disk.size
2453 self.LogInfo("Child disk has size %d, parent %d, fixing",
2454 fchild.size, disk.size)
2455 fchild.size = disk.size
2457 # and we recurse on this child only, not on the metadev
2458 return self._EnsureChildSizes(fchild) or mismatch
2462 def Exec(self, feedback_fn):
2463 """Verify the size of cluster disks.
2466 # TODO: check child disks too
2467 # TODO: check differences in size between primary/secondary nodes
2469 for instance in self.wanted_instances:
2470 pnode = instance.primary_node
2471 if pnode not in per_node_disks:
2472 per_node_disks[pnode] = []
2473 for idx, disk in enumerate(instance.disks):
2474 per_node_disks[pnode].append((instance, idx, disk))
2477 for node, dskl in per_node_disks.items():
2478 newl = [v[2].Copy() for v in dskl]
2480 self.cfg.SetDiskID(dsk, node)
2481 result = self.rpc.call_blockdev_getsizes(node, newl)
2483 self.LogWarning("Failure in blockdev_getsizes call to node"
2484 " %s, ignoring", node)
2486 if len(result.data) != len(dskl):
2487 self.LogWarning("Invalid result from node %s, ignoring node results",
2490 for ((instance, idx, disk), size) in zip(dskl, result.data):
2492 self.LogWarning("Disk %d of instance %s did not return size"
2493 " information, ignoring", idx, instance.name)
2495 if not isinstance(size, (int, long)):
2496 self.LogWarning("Disk %d of instance %s did not return valid"
2497 " size information, ignoring", idx, instance.name)
2500 if size != disk.size:
2501 self.LogInfo("Disk %d of instance %s has mismatched size,"
2502 " correcting: recorded %d, actual %d", idx,
2503 instance.name, disk.size, size)
2505 self.cfg.Update(instance, feedback_fn)
2506 changed.append((instance.name, idx, size))
2507 if self._EnsureChildSizes(disk):
2508 self.cfg.Update(instance, feedback_fn)
2509 changed.append((instance.name, idx, disk.size))
2513 class LURenameCluster(LogicalUnit):
2514 """Rename the cluster.
2517 HPATH = "cluster-rename"
2518 HTYPE = constants.HTYPE_CLUSTER
2519 _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2521 def BuildHooksEnv(self):
2526 "OP_TARGET": self.cfg.GetClusterName(),
2527 "NEW_NAME": self.op.name,
2529 mn = self.cfg.GetMasterNode()
2530 all_nodes = self.cfg.GetNodeList()
2531 return env, [mn], all_nodes
2533 def CheckPrereq(self):
2534 """Verify that the passed name is a valid one.
2537 hostname = netutils.GetHostname(name=self.op.name,
2538 family=self.cfg.GetPrimaryIPFamily())
2540 new_name = hostname.name
2541 self.ip = new_ip = hostname.ip
2542 old_name = self.cfg.GetClusterName()
2543 old_ip = self.cfg.GetMasterIP()
2544 if new_name == old_name and new_ip == old_ip:
2545 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2546 " cluster has changed",
2548 if new_ip != old_ip:
2549 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2550 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2551 " reachable on the network" %
2552 new_ip, errors.ECODE_NOTUNIQUE)
2554 self.op.name = new_name
2556 def Exec(self, feedback_fn):
2557 """Rename the cluster.
2560 clustername = self.op.name
2563 # shutdown the master IP
2564 master = self.cfg.GetMasterNode()
2565 result = self.rpc.call_node_stop_master(master, False)
2566 result.Raise("Could not disable the master role")
2569 cluster = self.cfg.GetClusterInfo()
2570 cluster.cluster_name = clustername
2571 cluster.master_ip = ip
2572 self.cfg.Update(cluster, feedback_fn)
2574 # update the known hosts file
2575 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2576 node_list = self.cfg.GetNodeList()
2578 node_list.remove(master)
2581 result = self.rpc.call_upload_file(node_list,
2582 constants.SSH_KNOWN_HOSTS_FILE)
2583 for to_node, to_result in result.iteritems():
2584 msg = to_result.fail_msg
2586 msg = ("Copy of file %s to node %s failed: %s" %
2587 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2588 self.proc.LogWarning(msg)
2591 result = self.rpc.call_node_start_master(master, False, False)
2592 msg = result.fail_msg
2594 self.LogWarning("Could not re-enable the master role on"
2595 " the master, please restart manually: %s", msg)
2600 class LUSetClusterParams(LogicalUnit):
2601 """Change the parameters of the cluster.
2604 HPATH = "cluster-modify"
2605 HTYPE = constants.HTYPE_CLUSTER
2607 ("vg_name", None, _TMaybeString),
2608 ("enabled_hypervisors", None,
2609 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2610 ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2611 ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2612 ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2613 ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2614 ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2615 ("uid_pool", None, _NoType),
2616 ("add_uids", None, _NoType),
2617 ("remove_uids", None, _NoType),
2618 ("maintain_node_health", None, _TMaybeBool),
2619 ("nicparams", None, _TOr(_TDict, _TNone)),
2620 ("drbd_helper", None, _TOr(_TString, _TNone)),
2621 ("default_iallocator", None, _TMaybeString),
2622 ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2623 ("hidden_os", None, _TOr(_TListOf(\
2626 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2628 ("blacklisted_os", None, _TOr(_TListOf(\
2631 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2636 def CheckArguments(self):
2640 if self.op.uid_pool:
2641 uidpool.CheckUidPool(self.op.uid_pool)
2643 if self.op.add_uids:
2644 uidpool.CheckUidPool(self.op.add_uids)
2646 if self.op.remove_uids:
2647 uidpool.CheckUidPool(self.op.remove_uids)
2649 def ExpandNames(self):
2650 # FIXME: in the future maybe other cluster params won't require checking on
2651 # all nodes to be modified.
2652 self.needed_locks = {
2653 locking.LEVEL_NODE: locking.ALL_SET,
2655 self.share_locks[locking.LEVEL_NODE] = 1
2657 def BuildHooksEnv(self):
2662 "OP_TARGET": self.cfg.GetClusterName(),
2663 "NEW_VG_NAME": self.op.vg_name,
2665 mn = self.cfg.GetMasterNode()
2666 return env, [mn], [mn]
2668 def CheckPrereq(self):
2669 """Check prerequisites.
2671 This checks whether the given params don't conflict and
2672 if the given volume group is valid.
2675 if self.op.vg_name is not None and not self.op.vg_name:
2676 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2677 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2678 " instances exist", errors.ECODE_INVAL)
2680 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2681 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2682 raise errors.OpPrereqError("Cannot disable drbd helper while"
2683 " drbd-based instances exist",
2686 node_list = self.acquired_locks[locking.LEVEL_NODE]
2688 # if vg_name not None, checks given volume group on all nodes
2690 vglist = self.rpc.call_vg_list(node_list)
2691 for node in node_list:
2692 msg = vglist[node].fail_msg
2694 # ignoring down node
2695 self.LogWarning("Error while gathering data on node %s"
2696 " (ignoring node): %s", node, msg)
2698 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2700 constants.MIN_VG_SIZE)
2702 raise errors.OpPrereqError("Error on node '%s': %s" %
2703 (node, vgstatus), errors.ECODE_ENVIRON)
2705 if self.op.drbd_helper:
2706 # checks given drbd helper on all nodes
2707 helpers = self.rpc.call_drbd_helper(node_list)
2708 for node in node_list:
2709 ninfo = self.cfg.GetNodeInfo(node)
2711 self.LogInfo("Not checking drbd helper on offline node %s", node)
2713 msg = helpers[node].fail_msg
2715 raise errors.OpPrereqError("Error checking drbd helper on node"
2716 " '%s': %s" % (node, msg),
2717 errors.ECODE_ENVIRON)
2718 node_helper = helpers[node].payload
2719 if node_helper != self.op.drbd_helper:
2720 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2721 (node, node_helper), errors.ECODE_ENVIRON)
2723 self.cluster = cluster = self.cfg.GetClusterInfo()
2724 # validate params changes
2725 if self.op.beparams:
2726 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2727 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2729 if self.op.nicparams:
2730 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2731 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2732 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2735 # check all instances for consistency
2736 for instance in self.cfg.GetAllInstancesInfo().values():
2737 for nic_idx, nic in enumerate(instance.nics):
2738 params_copy = copy.deepcopy(nic.nicparams)
2739 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2741 # check parameter syntax
2743 objects.NIC.CheckParameterSyntax(params_filled)
2744 except errors.ConfigurationError, err:
2745 nic_errors.append("Instance %s, nic/%d: %s" %
2746 (instance.name, nic_idx, err))
2748 # if we're moving instances to routed, check that they have an ip
2749 target_mode = params_filled[constants.NIC_MODE]
2750 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2751 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2752 (instance.name, nic_idx))
2754 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2755 "\n".join(nic_errors))
2757 # hypervisor list/parameters
2758 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2759 if self.op.hvparams:
2760 for hv_name, hv_dict in self.op.hvparams.items():
2761 if hv_name not in self.new_hvparams:
2762 self.new_hvparams[hv_name] = hv_dict
2764 self.new_hvparams[hv_name].update(hv_dict)
2766 # os hypervisor parameters
2767 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2769 for os_name, hvs in self.op.os_hvp.items():
2770 if os_name not in self.new_os_hvp:
2771 self.new_os_hvp[os_name] = hvs
2773 for hv_name, hv_dict in hvs.items():
2774 if hv_name not in self.new_os_hvp[os_name]:
2775 self.new_os_hvp[os_name][hv_name] = hv_dict
2777 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2780 self.new_osp = objects.FillDict(cluster.osparams, {})
2781 if self.op.osparams:
2782 for os_name, osp in self.op.osparams.items():
2783 if os_name not in self.new_osp:
2784 self.new_osp[os_name] = {}
2786 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2789 if not self.new_osp[os_name]:
2790 # we removed all parameters
2791 del self.new_osp[os_name]
2793 # check the parameter validity (remote check)
2794 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2795 os_name, self.new_osp[os_name])
2797 # changes to the hypervisor list
2798 if self.op.enabled_hypervisors is not None:
2799 self.hv_list = self.op.enabled_hypervisors
2800 for hv in self.hv_list:
2801 # if the hypervisor doesn't already exist in the cluster
2802 # hvparams, we initialize it to empty, and then (in both
2803 # cases) we make sure to fill the defaults, as we might not
2804 # have a complete defaults list if the hypervisor wasn't
2806 if hv not in new_hvp:
2808 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2809 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2811 self.hv_list = cluster.enabled_hypervisors
2813 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2814 # either the enabled list has changed, or the parameters have, validate
2815 for hv_name, hv_params in self.new_hvparams.items():
2816 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2817 (self.op.enabled_hypervisors and
2818 hv_name in self.op.enabled_hypervisors)):
2819 # either this is a new hypervisor, or its parameters have changed
2820 hv_class = hypervisor.GetHypervisor(hv_name)
2821 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2822 hv_class.CheckParameterSyntax(hv_params)
2823 _CheckHVParams(self, node_list, hv_name, hv_params)
2826 # no need to check any newly-enabled hypervisors, since the
2827 # defaults have already been checked in the above code-block
2828 for os_name, os_hvp in self.new_os_hvp.items():
2829 for hv_name, hv_params in os_hvp.items():
2830 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2831 # we need to fill in the new os_hvp on top of the actual hv_p
2832 cluster_defaults = self.new_hvparams.get(hv_name, {})
2833 new_osp = objects.FillDict(cluster_defaults, hv_params)
2834 hv_class = hypervisor.GetHypervisor(hv_name)
2835 hv_class.CheckParameterSyntax(new_osp)
2836 _CheckHVParams(self, node_list, hv_name, new_osp)
2838 if self.op.default_iallocator:
2839 alloc_script = utils.FindFile(self.op.default_iallocator,
2840 constants.IALLOCATOR_SEARCH_PATH,
2842 if alloc_script is None:
2843 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2844 " specified" % self.op.default_iallocator,
2847 def Exec(self, feedback_fn):
2848 """Change the parameters of the cluster.
2851 if self.op.vg_name is not None:
2852 new_volume = self.op.vg_name
2855 if new_volume != self.cfg.GetVGName():
2856 self.cfg.SetVGName(new_volume)
2858 feedback_fn("Cluster LVM configuration already in desired"
2859 " state, not changing")
2860 if self.op.drbd_helper is not None:
2861 new_helper = self.op.drbd_helper
2864 if new_helper != self.cfg.GetDRBDHelper():
2865 self.cfg.SetDRBDHelper(new_helper)
2867 feedback_fn("Cluster DRBD helper already in desired state,"
2869 if self.op.hvparams:
2870 self.cluster.hvparams = self.new_hvparams
2872 self.cluster.os_hvp = self.new_os_hvp
2873 if self.op.enabled_hypervisors is not None:
2874 self.cluster.hvparams = self.new_hvparams
2875 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2876 if self.op.beparams:
2877 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2878 if self.op.nicparams:
2879 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2880 if self.op.osparams:
2881 self.cluster.osparams = self.new_osp
2883 if self.op.candidate_pool_size is not None:
2884 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2885 # we need to update the pool size here, otherwise the save will fail
2886 _AdjustCandidatePool(self, [])
2888 if self.op.maintain_node_health is not None:
2889 self.cluster.maintain_node_health = self.op.maintain_node_health
2891 if self.op.add_uids is not None:
2892 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2894 if self.op.remove_uids is not None:
2895 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2897 if self.op.uid_pool is not None:
2898 self.cluster.uid_pool = self.op.uid_pool
2900 if self.op.default_iallocator is not None:
2901 self.cluster.default_iallocator = self.op.default_iallocator
2903 if self.op.reserved_lvs is not None:
2904 self.cluster.reserved_lvs = self.op.reserved_lvs
2906 def helper_os(aname, mods, desc):
2908 lst = getattr(self.cluster, aname)
2909 for key, val in mods:
2910 if key == constants.DDM_ADD:
2912 feedback_fn("OS %s already in %s, ignoring", val, desc)
2915 elif key == constants.DDM_REMOVE:
2919 feedback_fn("OS %s not found in %s, ignoring", val, desc)
2921 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2923 if self.op.hidden_os:
2924 helper_os("hidden_os", self.op.hidden_os, "hidden")
2926 if self.op.blacklisted_os:
2927 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2929 self.cfg.Update(self.cluster, feedback_fn)
2932 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2933 """Distribute additional files which are part of the cluster configuration.
2935 ConfigWriter takes care of distributing the config and ssconf files, but
2936 there are more files which should be distributed to all nodes. This function
2937 makes sure those are copied.
2939 @param lu: calling logical unit
2940 @param additional_nodes: list of nodes not in the config to distribute to
2943 # 1. Gather target nodes
2944 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2945 dist_nodes = lu.cfg.GetOnlineNodeList()
2946 if additional_nodes is not None:
2947 dist_nodes.extend(additional_nodes)
2948 if myself.name in dist_nodes:
2949 dist_nodes.remove(myself.name)
2951 # 2. Gather files to distribute
2952 dist_files = set([constants.ETC_HOSTS,
2953 constants.SSH_KNOWN_HOSTS_FILE,
2954 constants.RAPI_CERT_FILE,
2955 constants.RAPI_USERS_FILE,
2956 constants.CONFD_HMAC_KEY,
2957 constants.CLUSTER_DOMAIN_SECRET_FILE,
2960 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2961 for hv_name in enabled_hypervisors:
2962 hv_class = hypervisor.GetHypervisor(hv_name)
2963 dist_files.update(hv_class.GetAncillaryFiles())
2965 # 3. Perform the files upload
2966 for fname in dist_files:
2967 if os.path.exists(fname):
2968 result = lu.rpc.call_upload_file(dist_nodes, fname)
2969 for to_node, to_result in result.items():
2970 msg = to_result.fail_msg
2972 msg = ("Copy of file %s to node %s failed: %s" %
2973 (fname, to_node, msg))
2974 lu.proc.LogWarning(msg)
2977 class LURedistributeConfig(NoHooksLU):
2978 """Force the redistribution of cluster configuration.
2980 This is a very simple LU.
2985 def ExpandNames(self):
2986 self.needed_locks = {
2987 locking.LEVEL_NODE: locking.ALL_SET,
2989 self.share_locks[locking.LEVEL_NODE] = 1
2991 def Exec(self, feedback_fn):
2992 """Redistribute the configuration.
2995 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2996 _RedistributeAncillaryFiles(self)
2999 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3000 """Sleep and poll for an instance's disk to sync.
3003 if not instance.disks or disks is not None and not disks:
3006 disks = _ExpandCheckDisks(instance, disks)
3009 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3011 node = instance.primary_node
3014 lu.cfg.SetDiskID(dev, node)
3016 # TODO: Convert to utils.Retry
3019 degr_retries = 10 # in seconds, as we sleep 1 second each time
3023 cumul_degraded = False
3024 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3025 msg = rstats.fail_msg
3027 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3030 raise errors.RemoteError("Can't contact node %s for mirror data,"
3031 " aborting." % node)
3034 rstats = rstats.payload
3036 for i, mstat in enumerate(rstats):
3038 lu.LogWarning("Can't compute data for node %s/%s",
3039 node, disks[i].iv_name)
3042 cumul_degraded = (cumul_degraded or
3043 (mstat.is_degraded and mstat.sync_percent is None))
3044 if mstat.sync_percent is not None:
3046 if mstat.estimated_time is not None:
3047 rem_time = ("%s remaining (estimated)" %
3048 utils.FormatSeconds(mstat.estimated_time))
3049 max_time = mstat.estimated_time
3051 rem_time = "no time estimate"
3052 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3053 (disks[i].iv_name, mstat.sync_percent, rem_time))
3055 # if we're done but degraded, let's do a few small retries, to
3056 # make sure we see a stable and not transient situation; therefore
3057 # we force restart of the loop
3058 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3059 logging.info("Degraded disks found, %d retries left", degr_retries)
3067 time.sleep(min(60, max_time))
3070 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3071 return not cumul_degraded
3074 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3075 """Check that mirrors are not degraded.
3077 The ldisk parameter, if True, will change the test from the
3078 is_degraded attribute (which represents overall non-ok status for
3079 the device(s)) to the ldisk (representing the local storage status).
3082 lu.cfg.SetDiskID(dev, node)
3086 if on_primary or dev.AssembleOnSecondary():
3087 rstats = lu.rpc.call_blockdev_find(node, dev)
3088 msg = rstats.fail_msg
3090 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3092 elif not rstats.payload:
3093 lu.LogWarning("Can't find disk on node %s", node)
3097 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3099 result = result and not rstats.payload.is_degraded
3102 for child in dev.children:
3103 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3108 class LUDiagnoseOS(NoHooksLU):
3109 """Logical unit for OS diagnose/query.
3114 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3118 _BLK = "blacklisted"
3120 _FIELDS_STATIC = utils.FieldSet()
3121 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3122 "parameters", "api_versions", _HID, _BLK)
3124 def CheckArguments(self):
3126 raise errors.OpPrereqError("Selective OS query not supported",
3129 _CheckOutputFields(static=self._FIELDS_STATIC,
3130 dynamic=self._FIELDS_DYNAMIC,
3131 selected=self.op.output_fields)
3133 def ExpandNames(self):
3134 # Lock all nodes, in shared mode
3135 # Temporary removal of locks, should be reverted later
3136 # TODO: reintroduce locks when they are lighter-weight
3137 self.needed_locks = {}
3138 #self.share_locks[locking.LEVEL_NODE] = 1
3139 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3142 def _DiagnoseByOS(rlist):
3143 """Remaps a per-node return list into an a per-os per-node dictionary
3145 @param rlist: a map with node names as keys and OS objects as values
3148 @return: a dictionary with osnames as keys and as value another
3149 map, with nodes as keys and tuples of (path, status, diagnose,
3150 variants, parameters, api_versions) as values, eg::
3152 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3153 (/srv/..., False, "invalid api")],
3154 "node2": [(/srv/..., True, "", [], [])]}
3159 # we build here the list of nodes that didn't fail the RPC (at RPC
3160 # level), so that nodes with a non-responding node daemon don't
3161 # make all OSes invalid
3162 good_nodes = [node_name for node_name in rlist
3163 if not rlist[node_name].fail_msg]
3164 for node_name, nr in rlist.items():
3165 if nr.fail_msg or not nr.payload:
3167 for (name, path, status, diagnose, variants,
3168 params, api_versions) in nr.payload:
3169 if name not in all_os:
3170 # build a list of nodes for this os containing empty lists
3171 # for each node in node_list
3173 for nname in good_nodes:
3174 all_os[name][nname] = []
3175 # convert params from [name, help] to (name, help)
3176 params = [tuple(v) for v in params]
3177 all_os[name][node_name].append((path, status, diagnose,
3178 variants, params, api_versions))
3181 def Exec(self, feedback_fn):
3182 """Compute the list of OSes.
3185 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3186 node_data = self.rpc.call_os_diagnose(valid_nodes)
3187 pol = self._DiagnoseByOS(node_data)
3189 cluster = self.cfg.GetClusterInfo()
3191 for os_name in utils.NiceSort(pol.keys()):
3192 os_data = pol[os_name]
3195 (variants, params, api_versions) = null_state = (set(), set(), set())
3196 for idx, osl in enumerate(os_data.values()):
3197 valid = bool(valid and osl and osl[0][1])
3199 (variants, params, api_versions) = null_state
3201 node_variants, node_params, node_api = osl[0][3:6]
3202 if idx == 0: # first entry
3203 variants = set(node_variants)
3204 params = set(node_params)
3205 api_versions = set(node_api)
3206 else: # keep consistency
3207 variants.intersection_update(node_variants)
3208 params.intersection_update(node_params)
3209 api_versions.intersection_update(node_api)
3211 is_hid = os_name in cluster.hidden_os
3212 is_blk = os_name in cluster.blacklisted_os
3213 if ((self._HID not in self.op.output_fields and is_hid) or
3214 (self._BLK not in self.op.output_fields and is_blk) or
3215 (self._VLD not in self.op.output_fields and not valid)):
3218 for field in self.op.output_fields:
3221 elif field == self._VLD:
3223 elif field == "node_status":
3224 # this is just a copy of the dict
3226 for node_name, nos_list in os_data.items():
3227 val[node_name] = nos_list
3228 elif field == "variants":
3229 val = utils.NiceSort(list(variants))
3230 elif field == "parameters":
3232 elif field == "api_versions":
3233 val = list(api_versions)
3234 elif field == self._HID:
3236 elif field == self._BLK:
3239 raise errors.ParameterError(field)
3246 class LURemoveNode(LogicalUnit):
3247 """Logical unit for removing a node.
3250 HPATH = "node-remove"
3251 HTYPE = constants.HTYPE_NODE
3256 def BuildHooksEnv(self):
3259 This doesn't run on the target node in the pre phase as a failed
3260 node would then be impossible to remove.
3264 "OP_TARGET": self.op.node_name,
3265 "NODE_NAME": self.op.node_name,
3267 all_nodes = self.cfg.GetNodeList()
3269 all_nodes.remove(self.op.node_name)
3271 logging.warning("Node %s which is about to be removed not found"
3272 " in the all nodes list", self.op.node_name)
3273 return env, all_nodes, all_nodes
3275 def CheckPrereq(self):
3276 """Check prerequisites.
3279 - the node exists in the configuration
3280 - it does not have primary or secondary instances
3281 - it's not the master
3283 Any errors are signaled by raising errors.OpPrereqError.
3286 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3287 node = self.cfg.GetNodeInfo(self.op.node_name)
3288 assert node is not None
3290 instance_list = self.cfg.GetInstanceList()
3292 masternode = self.cfg.GetMasterNode()
3293 if node.name == masternode:
3294 raise errors.OpPrereqError("Node is the master node,"
3295 " you need to failover first.",
3298 for instance_name in instance_list:
3299 instance = self.cfg.GetInstanceInfo(instance_name)
3300 if node.name in instance.all_nodes:
3301 raise errors.OpPrereqError("Instance %s is still running on the node,"
3302 " please remove first." % instance_name,
3304 self.op.node_name = node.name
3307 def Exec(self, feedback_fn):
3308 """Removes the node from the cluster.
3312 logging.info("Stopping the node daemon and removing configs from node %s",
3315 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3317 # Promote nodes to master candidate as needed
3318 _AdjustCandidatePool(self, exceptions=[node.name])
3319 self.context.RemoveNode(node.name)
3321 # Run post hooks on the node before it's removed
3322 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3324 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3326 # pylint: disable-msg=W0702
3327 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3329 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3330 msg = result.fail_msg
3332 self.LogWarning("Errors encountered on the remote node while leaving"
3333 " the cluster: %s", msg)
3335 # Remove node from our /etc/hosts
3336 if self.cfg.GetClusterInfo().modify_etc_hosts:
3337 master_node = self.cfg.GetMasterNode()
3338 result = self.rpc.call_etc_hosts_modify(master_node,
3339 constants.ETC_HOSTS_REMOVE,
3341 result.Raise("Can't update hosts file with new host data")
3342 _RedistributeAncillaryFiles(self)
3345 class LUQueryNodes(NoHooksLU):
3346 """Logical unit for querying nodes.
3349 # pylint: disable-msg=W0142
3352 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3353 ("use_locking", False, _TBool),
3357 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3358 "master_candidate", "offline", "drained"]
3360 _FIELDS_DYNAMIC = utils.FieldSet(
3362 "mtotal", "mnode", "mfree",
3364 "ctotal", "cnodes", "csockets",
3367 _FIELDS_STATIC = utils.FieldSet(*[
3368 "pinst_cnt", "sinst_cnt",
3369 "pinst_list", "sinst_list",
3370 "pip", "sip", "tags",
3372 "role"] + _SIMPLE_FIELDS
3375 def CheckArguments(self):
3376 _CheckOutputFields(static=self._FIELDS_STATIC,
3377 dynamic=self._FIELDS_DYNAMIC,
3378 selected=self.op.output_fields)
3380 def ExpandNames(self):
3381 self.needed_locks = {}
3382 self.share_locks[locking.LEVEL_NODE] = 1
3385 self.wanted = _GetWantedNodes(self, self.op.names)
3387 self.wanted = locking.ALL_SET
3389 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3390 self.do_locking = self.do_node_query and self.op.use_locking
3392 # if we don't request only static fields, we need to lock the nodes
3393 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3395 def Exec(self, feedback_fn):
3396 """Computes the list of nodes and their attributes.
3399 all_info = self.cfg.GetAllNodesInfo()
3401 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3402 elif self.wanted != locking.ALL_SET:
3403 nodenames = self.wanted
3404 missing = set(nodenames).difference(all_info.keys())
3406 raise errors.OpExecError(
3407 "Some nodes were removed before retrieving their data: %s" % missing)
3409 nodenames = all_info.keys()
3411 nodenames = utils.NiceSort(nodenames)
3412 nodelist = [all_info[name] for name in nodenames]
3414 # begin data gathering
3416 if self.do_node_query:
3418 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3419 self.cfg.GetHypervisorType())
3420 for name in nodenames:
3421 nodeinfo = node_data[name]
3422 if not nodeinfo.fail_msg and nodeinfo.payload:
3423 nodeinfo = nodeinfo.payload
3424 fn = utils.TryConvert
3426 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3427 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3428 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3429 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3430 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3431 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3432 "bootid": nodeinfo.get('bootid', None),
3433 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3434 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3437 live_data[name] = {}
3439 live_data = dict.fromkeys(nodenames, {})
3441 node_to_primary = dict([(name, set()) for name in nodenames])
3442 node_to_secondary = dict([(name, set()) for name in nodenames])
3444 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3445 "sinst_cnt", "sinst_list"))
3446 if inst_fields & frozenset(self.op.output_fields):
3447 inst_data = self.cfg.GetAllInstancesInfo()
3449 for inst in inst_data.values():
3450 if inst.primary_node in node_to_primary:
3451 node_to_primary[inst.primary_node].add(inst.name)
3452 for secnode in inst.secondary_nodes:
3453 if secnode in node_to_secondary:
3454 node_to_secondary[secnode].add(inst.name)
3456 master_node = self.cfg.GetMasterNode()
3458 # end data gathering
3461 for node in nodelist:
3463 for field in self.op.output_fields:
3464 if field in self._SIMPLE_FIELDS:
3465 val = getattr(node, field)
3466 elif field == "pinst_list":
3467 val = list(node_to_primary[node.name])
3468 elif field == "sinst_list":
3469 val = list(node_to_secondary[node.name])
3470 elif field == "pinst_cnt":
3471 val = len(node_to_primary[node.name])
3472 elif field == "sinst_cnt":
3473 val = len(node_to_secondary[node.name])
3474 elif field == "pip":
3475 val = node.primary_ip
3476 elif field == "sip":
3477 val = node.secondary_ip
3478 elif field == "tags":
3479 val = list(node.GetTags())
3480 elif field == "master":
3481 val = node.name == master_node
3482 elif self._FIELDS_DYNAMIC.Matches(field):
3483 val = live_data[node.name].get(field, None)
3484 elif field == "role":
3485 if node.name == master_node:
3487 elif node.master_candidate:
3496 raise errors.ParameterError(field)
3497 node_output.append(val)
3498 output.append(node_output)
3503 class LUQueryNodeVolumes(NoHooksLU):
3504 """Logical unit for getting volumes on node(s).
3508 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3509 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3512 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3513 _FIELDS_STATIC = utils.FieldSet("node")
3515 def CheckArguments(self):
3516 _CheckOutputFields(static=self._FIELDS_STATIC,
3517 dynamic=self._FIELDS_DYNAMIC,
3518 selected=self.op.output_fields)
3520 def ExpandNames(self):
3521 self.needed_locks = {}
3522 self.share_locks[locking.LEVEL_NODE] = 1
3523 if not self.op.nodes:
3524 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3526 self.needed_locks[locking.LEVEL_NODE] = \
3527 _GetWantedNodes(self, self.op.nodes)
3529 def Exec(self, feedback_fn):
3530 """Computes the list of nodes and their attributes.
3533 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3534 volumes = self.rpc.call_node_volumes(nodenames)
3536 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3537 in self.cfg.GetInstanceList()]
3539 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3542 for node in nodenames:
3543 nresult = volumes[node]
3546 msg = nresult.fail_msg
3548 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3551 node_vols = nresult.payload[:]
3552 node_vols.sort(key=lambda vol: vol['dev'])
3554 for vol in node_vols:
3556 for field in self.op.output_fields:
3559 elif field == "phys":
3563 elif field == "name":
3565 elif field == "size":
3566 val = int(float(vol['size']))
3567 elif field == "instance":
3569 if node not in lv_by_node[inst]:
3571 if vol['name'] in lv_by_node[inst][node]:
3577 raise errors.ParameterError(field)
3578 node_output.append(str(val))
3580 output.append(node_output)
3585 class LUQueryNodeStorage(NoHooksLU):
3586 """Logical unit for getting information on storage units on node(s).
3589 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3591 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3592 ("storage_type", _NoDefault, _CheckStorageType),
3593 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3594 ("name", None, _TMaybeString),
3598 def CheckArguments(self):
3599 _CheckOutputFields(static=self._FIELDS_STATIC,
3600 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3601 selected=self.op.output_fields)
3603 def ExpandNames(self):
3604 self.needed_locks = {}
3605 self.share_locks[locking.LEVEL_NODE] = 1
3608 self.needed_locks[locking.LEVEL_NODE] = \
3609 _GetWantedNodes(self, self.op.nodes)
3611 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3613 def Exec(self, feedback_fn):
3614 """Computes the list of nodes and their attributes.
3617 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3619 # Always get name to sort by
3620 if constants.SF_NAME in self.op.output_fields:
3621 fields = self.op.output_fields[:]
3623 fields = [constants.SF_NAME] + self.op.output_fields
3625 # Never ask for node or type as it's only known to the LU
3626 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3627 while extra in fields:
3628 fields.remove(extra)
3630 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3631 name_idx = field_idx[constants.SF_NAME]
3633 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3634 data = self.rpc.call_storage_list(self.nodes,
3635 self.op.storage_type, st_args,
3636 self.op.name, fields)
3640 for node in utils.NiceSort(self.nodes):
3641 nresult = data[node]
3645 msg = nresult.fail_msg
3647 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3650 rows = dict([(row[name_idx], row) for row in nresult.payload])
3652 for name in utils.NiceSort(rows.keys()):
3657 for field in self.op.output_fields:
3658 if field == constants.SF_NODE:
3660 elif field == constants.SF_TYPE:
3661 val = self.op.storage_type
3662 elif field in field_idx:
3663 val = row[field_idx[field]]
3665 raise errors.ParameterError(field)
3674 class LUModifyNodeStorage(NoHooksLU):
3675 """Logical unit for modifying a storage volume on a node.
3680 ("storage_type", _NoDefault, _CheckStorageType),
3681 ("name", _NoDefault, _TNonEmptyString),
3682 ("changes", _NoDefault, _TDict),
3686 def CheckArguments(self):
3687 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3689 storage_type = self.op.storage_type
3692 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3694 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3695 " modified" % storage_type,
3698 diff = set(self.op.changes.keys()) - modifiable
3700 raise errors.OpPrereqError("The following fields can not be modified for"
3701 " storage units of type '%s': %r" %
3702 (storage_type, list(diff)),
3705 def ExpandNames(self):
3706 self.needed_locks = {
3707 locking.LEVEL_NODE: self.op.node_name,
3710 def Exec(self, feedback_fn):
3711 """Computes the list of nodes and their attributes.
3714 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3715 result = self.rpc.call_storage_modify(self.op.node_name,
3716 self.op.storage_type, st_args,
3717 self.op.name, self.op.changes)
3718 result.Raise("Failed to modify storage unit '%s' on %s" %
3719 (self.op.name, self.op.node_name))
3722 class LUAddNode(LogicalUnit):
3723 """Logical unit for adding node to the cluster.
3727 HTYPE = constants.HTYPE_NODE
3730 ("primary_ip", None, _NoType),
3731 ("secondary_ip", None, _TMaybeString),
3732 ("readd", False, _TBool),
3733 ("nodegroup", None, _TMaybeString)
3736 def CheckArguments(self):
3737 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3738 # validate/normalize the node name
3739 self.hostname = netutils.GetHostname(name=self.op.node_name,
3740 family=self.primary_ip_family)
3741 self.op.node_name = self.hostname.name
3742 if self.op.readd and self.op.nodegroup:
3743 raise errors.OpPrereqError("Cannot pass a nodegroup when a node is"
3744 " being readded", errors.ECODE_INVAL)
3746 def BuildHooksEnv(self):
3749 This will run on all nodes before, and on all nodes + the new node after.
3753 "OP_TARGET": self.op.node_name,
3754 "NODE_NAME": self.op.node_name,
3755 "NODE_PIP": self.op.primary_ip,
3756 "NODE_SIP": self.op.secondary_ip,
3758 nodes_0 = self.cfg.GetNodeList()
3759 nodes_1 = nodes_0 + [self.op.node_name, ]
3760 return env, nodes_0, nodes_1
3762 def CheckPrereq(self):
3763 """Check prerequisites.
3766 - the new node is not already in the config
3768 - its parameters (single/dual homed) matches the cluster
3770 Any errors are signaled by raising errors.OpPrereqError.
3774 hostname = self.hostname
3775 node = hostname.name
3776 primary_ip = self.op.primary_ip = hostname.ip
3777 if self.op.secondary_ip is None:
3778 if self.primary_ip_family == netutils.IP6Address.family:
3779 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3780 " IPv4 address must be given as secondary",
3782 self.op.secondary_ip = primary_ip
3784 secondary_ip = self.op.secondary_ip
3785 if not netutils.IP4Address.IsValid(secondary_ip):
3786 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3787 " address" % secondary_ip, errors.ECODE_INVAL)
3789 node_list = cfg.GetNodeList()
3790 if not self.op.readd and node in node_list:
3791 raise errors.OpPrereqError("Node %s is already in the configuration" %
3792 node, errors.ECODE_EXISTS)
3793 elif self.op.readd and node not in node_list:
3794 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3797 self.changed_primary_ip = False
3799 for existing_node_name in node_list:
3800 existing_node = cfg.GetNodeInfo(existing_node_name)
3802 if self.op.readd and node == existing_node_name:
3803 if existing_node.secondary_ip != secondary_ip:
3804 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3805 " address configuration as before",
3807 if existing_node.primary_ip != primary_ip:
3808 self.changed_primary_ip = True
3812 if (existing_node.primary_ip == primary_ip or
3813 existing_node.secondary_ip == primary_ip or
3814 existing_node.primary_ip == secondary_ip or
3815 existing_node.secondary_ip == secondary_ip):
3816 raise errors.OpPrereqError("New node ip address(es) conflict with"
3817 " existing node %s" % existing_node.name,
3818 errors.ECODE_NOTUNIQUE)
3820 # check that the type of the node (single versus dual homed) is the
3821 # same as for the master
3822 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3823 master_singlehomed = myself.secondary_ip == myself.primary_ip
3824 newbie_singlehomed = secondary_ip == primary_ip
3825 if master_singlehomed != newbie_singlehomed:
3826 if master_singlehomed:
3827 raise errors.OpPrereqError("The master has no private ip but the"
3828 " new node has one",
3831 raise errors.OpPrereqError("The master has a private ip but the"
3832 " new node doesn't have one",
3835 # checks reachability
3836 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3837 raise errors.OpPrereqError("Node not reachable by ping",
3838 errors.ECODE_ENVIRON)
3840 if not newbie_singlehomed:
3841 # check reachability from my secondary ip to newbie's secondary ip
3842 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3843 source=myself.secondary_ip):
3844 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3845 " based ping to noded port",
3846 errors.ECODE_ENVIRON)
3853 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3856 self.new_node = self.cfg.GetNodeInfo(node)
3857 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3859 nodegroup = cfg.LookupNodeGroup(self.op.nodegroup)
3860 self.new_node = objects.Node(name=node,
3861 primary_ip=primary_ip,
3862 secondary_ip=secondary_ip,
3863 master_candidate=self.master_candidate,
3864 offline=False, drained=False,
3865 nodegroup=nodegroup)
3867 def Exec(self, feedback_fn):
3868 """Adds the new node to the cluster.
3871 new_node = self.new_node
3872 node = new_node.name
3874 # for re-adds, reset the offline/drained/master-candidate flags;
3875 # we need to reset here, otherwise offline would prevent RPC calls
3876 # later in the procedure; this also means that if the re-add
3877 # fails, we are left with a non-offlined, broken node
3879 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3880 self.LogInfo("Readding a node, the offline/drained flags were reset")
3881 # if we demote the node, we do cleanup later in the procedure
3882 new_node.master_candidate = self.master_candidate
3883 if self.changed_primary_ip:
3884 new_node.primary_ip = self.op.primary_ip
3886 # notify the user about any possible mc promotion
3887 if new_node.master_candidate:
3888 self.LogInfo("Node will be a master candidate")
3890 # check connectivity
3891 result = self.rpc.call_version([node])[node]
3892 result.Raise("Can't get version information from node %s" % node)
3893 if constants.PROTOCOL_VERSION == result.payload:
3894 logging.info("Communication to node %s fine, sw version %s match",
3895 node, result.payload)
3897 raise errors.OpExecError("Version mismatch master version %s,"
3898 " node version %s" %
3899 (constants.PROTOCOL_VERSION, result.payload))
3901 # Add node to our /etc/hosts, and add key to known_hosts
3902 if self.cfg.GetClusterInfo().modify_etc_hosts:
3903 master_node = self.cfg.GetMasterNode()
3904 result = self.rpc.call_etc_hosts_modify(master_node,
3905 constants.ETC_HOSTS_ADD,
3908 result.Raise("Can't update hosts file with new host data")
3910 if new_node.secondary_ip != new_node.primary_ip:
3911 result = self.rpc.call_node_has_ip_address(new_node.name,
3912 new_node.secondary_ip)
3913 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3914 prereq=True, ecode=errors.ECODE_ENVIRON)
3915 if not result.payload:
3916 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3917 " you gave (%s). Please fix and re-run this"
3918 " command." % new_node.secondary_ip)
3920 node_verify_list = [self.cfg.GetMasterNode()]
3921 node_verify_param = {
3922 constants.NV_NODELIST: [node],
3923 # TODO: do a node-net-test as well?
3926 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3927 self.cfg.GetClusterName())
3928 for verifier in node_verify_list:
3929 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3930 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3932 for failed in nl_payload:
3933 feedback_fn("ssh/hostname verification failed"
3934 " (checking from %s): %s" %
3935 (verifier, nl_payload[failed]))
3936 raise errors.OpExecError("ssh/hostname verification failed.")
3939 _RedistributeAncillaryFiles(self)
3940 self.context.ReaddNode(new_node)
3941 # make sure we redistribute the config
3942 self.cfg.Update(new_node, feedback_fn)
3943 # and make sure the new node will not have old files around
3944 if not new_node.master_candidate:
3945 result = self.rpc.call_node_demote_from_mc(new_node.name)
3946 msg = result.fail_msg
3948 self.LogWarning("Node failed to demote itself from master"
3949 " candidate status: %s" % msg)
3951 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3952 self.context.AddNode(new_node, self.proc.GetECId())
3955 class LUSetNodeParams(LogicalUnit):
3956 """Modifies the parameters of a node.
3959 HPATH = "node-modify"
3960 HTYPE = constants.HTYPE_NODE
3963 ("master_candidate", None, _TMaybeBool),
3964 ("offline", None, _TMaybeBool),
3965 ("drained", None, _TMaybeBool),
3966 ("auto_promote", False, _TBool),
3971 def CheckArguments(self):
3972 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3973 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3974 if all_mods.count(None) == 3:
3975 raise errors.OpPrereqError("Please pass at least one modification",
3977 if all_mods.count(True) > 1:
3978 raise errors.OpPrereqError("Can't set the node into more than one"
3979 " state at the same time",
3982 # Boolean value that tells us whether we're offlining or draining the node
3983 self.offline_or_drain = (self.op.offline == True or
3984 self.op.drained == True)
3985 self.deoffline_or_drain = (self.op.offline == False or
3986 self.op.drained == False)
3987 self.might_demote = (self.op.master_candidate == False or
3988 self.offline_or_drain)
3990 self.lock_all = self.op.auto_promote and self.might_demote
3993 def ExpandNames(self):
3995 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3997 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3999 def BuildHooksEnv(self):
4002 This runs on the master node.
4006 "OP_TARGET": self.op.node_name,
4007 "MASTER_CANDIDATE": str(self.op.master_candidate),
4008 "OFFLINE": str(self.op.offline),
4009 "DRAINED": str(self.op.drained),
4011 nl = [self.cfg.GetMasterNode(),
4015 def CheckPrereq(self):
4016 """Check prerequisites.
4018 This only checks the instance list against the existing names.
4021 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4023 if (self.op.master_candidate is not None or
4024 self.op.drained is not None or
4025 self.op.offline is not None):
4026 # we can't change the master's node flags
4027 if self.op.node_name == self.cfg.GetMasterNode():
4028 raise errors.OpPrereqError("The master role can be changed"
4029 " only via master-failover",
4033 if node.master_candidate and self.might_demote and not self.lock_all:
4034 assert not self.op.auto_promote, "auto-promote set but lock_all not"
4035 # check if after removing the current node, we're missing master
4037 (mc_remaining, mc_should, _) = \
4038 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4039 if mc_remaining < mc_should:
4040 raise errors.OpPrereqError("Not enough master candidates, please"
4041 " pass auto_promote to allow promotion",
4044 if (self.op.master_candidate == True and
4045 ((node.offline and not self.op.offline == False) or
4046 (node.drained and not self.op.drained == False))):
4047 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4048 " to master_candidate" % node.name,
4051 # If we're being deofflined/drained, we'll MC ourself if needed
4052 if (self.deoffline_or_drain and not self.offline_or_drain and not
4053 self.op.master_candidate == True and not node.master_candidate):
4054 self.op.master_candidate = _DecideSelfPromotion(self)
4055 if self.op.master_candidate:
4056 self.LogInfo("Autopromoting node to master candidate")
4060 def Exec(self, feedback_fn):
4069 if self.op.offline is not None:
4070 node.offline = self.op.offline
4071 result.append(("offline", str(self.op.offline)))
4072 if self.op.offline == True:
4073 if node.master_candidate:
4074 node.master_candidate = False
4076 result.append(("master_candidate", "auto-demotion due to offline"))
4078 node.drained = False
4079 result.append(("drained", "clear drained status due to offline"))
4081 if self.op.master_candidate is not None:
4082 node.master_candidate = self.op.master_candidate
4084 result.append(("master_candidate", str(self.op.master_candidate)))
4085 if self.op.master_candidate == False:
4086 rrc = self.rpc.call_node_demote_from_mc(node.name)
4089 self.LogWarning("Node failed to demote itself: %s" % msg)
4091 if self.op.drained is not None:
4092 node.drained = self.op.drained
4093 result.append(("drained", str(self.op.drained)))
4094 if self.op.drained == True:
4095 if node.master_candidate:
4096 node.master_candidate = False
4098 result.append(("master_candidate", "auto-demotion due to drain"))
4099 rrc = self.rpc.call_node_demote_from_mc(node.name)
4102 self.LogWarning("Node failed to demote itself: %s" % msg)
4104 node.offline = False
4105 result.append(("offline", "clear offline status due to drain"))
4107 # we locked all nodes, we adjust the CP before updating this node
4109 _AdjustCandidatePool(self, [node.name])
4111 # this will trigger configuration file update, if needed
4112 self.cfg.Update(node, feedback_fn)
4114 # this will trigger job queue propagation or cleanup
4116 self.context.ReaddNode(node)
4121 class LUPowercycleNode(NoHooksLU):
4122 """Powercycles a node.
4131 def CheckArguments(self):
4132 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4133 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4134 raise errors.OpPrereqError("The node is the master and the force"
4135 " parameter was not set",
4138 def ExpandNames(self):
4139 """Locking for PowercycleNode.
4141 This is a last-resort option and shouldn't block on other
4142 jobs. Therefore, we grab no locks.
4145 self.needed_locks = {}
4147 def Exec(self, feedback_fn):
4151 result = self.rpc.call_node_powercycle(self.op.node_name,
4152 self.cfg.GetHypervisorType())
4153 result.Raise("Failed to schedule the reboot")
4154 return result.payload
4157 class LUQueryClusterInfo(NoHooksLU):
4158 """Query cluster configuration.
4163 def ExpandNames(self):
4164 self.needed_locks = {}
4166 def Exec(self, feedback_fn):
4167 """Return cluster config.
4170 cluster = self.cfg.GetClusterInfo()
4173 # Filter just for enabled hypervisors
4174 for os_name, hv_dict in cluster.os_hvp.items():
4175 os_hvp[os_name] = {}
4176 for hv_name, hv_params in hv_dict.items():
4177 if hv_name in cluster.enabled_hypervisors:
4178 os_hvp[os_name][hv_name] = hv_params
4180 # Convert ip_family to ip_version
4181 primary_ip_version = constants.IP4_VERSION
4182 if cluster.primary_ip_family == netutils.IP6Address.family:
4183 primary_ip_version = constants.IP6_VERSION
4186 "software_version": constants.RELEASE_VERSION,
4187 "protocol_version": constants.PROTOCOL_VERSION,
4188 "config_version": constants.CONFIG_VERSION,
4189 "os_api_version": max(constants.OS_API_VERSIONS),
4190 "export_version": constants.EXPORT_VERSION,
4191 "architecture": (platform.architecture()[0], platform.machine()),
4192 "name": cluster.cluster_name,
4193 "master": cluster.master_node,
4194 "default_hypervisor": cluster.enabled_hypervisors[0],
4195 "enabled_hypervisors": cluster.enabled_hypervisors,
4196 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4197 for hypervisor_name in cluster.enabled_hypervisors]),
4199 "beparams": cluster.beparams,
4200 "osparams": cluster.osparams,
4201 "nicparams": cluster.nicparams,
4202 "candidate_pool_size": cluster.candidate_pool_size,
4203 "master_netdev": cluster.master_netdev,
4204 "volume_group_name": cluster.volume_group_name,
4205 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4206 "file_storage_dir": cluster.file_storage_dir,
4207 "maintain_node_health": cluster.maintain_node_health,
4208 "ctime": cluster.ctime,
4209 "mtime": cluster.mtime,
4210 "uuid": cluster.uuid,
4211 "tags": list(cluster.GetTags()),
4212 "uid_pool": cluster.uid_pool,
4213 "default_iallocator": cluster.default_iallocator,
4214 "reserved_lvs": cluster.reserved_lvs,
4215 "primary_ip_version": primary_ip_version,
4221 class LUQueryConfigValues(NoHooksLU):
4222 """Return configuration values.
4225 _OP_PARAMS = [_POutputFields]
4227 _FIELDS_DYNAMIC = utils.FieldSet()
4228 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4229 "watcher_pause", "volume_group_name")
4231 def CheckArguments(self):
4232 _CheckOutputFields(static=self._FIELDS_STATIC,
4233 dynamic=self._FIELDS_DYNAMIC,
4234 selected=self.op.output_fields)
4236 def ExpandNames(self):
4237 self.needed_locks = {}
4239 def Exec(self, feedback_fn):
4240 """Dump a representation of the cluster config to the standard output.
4244 for field in self.op.output_fields:
4245 if field == "cluster_name":
4246 entry = self.cfg.GetClusterName()
4247 elif field == "master_node":
4248 entry = self.cfg.GetMasterNode()
4249 elif field == "drain_flag":
4250 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4251 elif field == "watcher_pause":
4252 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4253 elif field == "volume_group_name":
4254 entry = self.cfg.GetVGName()
4256 raise errors.ParameterError(field)
4257 values.append(entry)
4261 class LUActivateInstanceDisks(NoHooksLU):
4262 """Bring up an instance's disks.
4267 ("ignore_size", False, _TBool),
4271 def ExpandNames(self):
4272 self._ExpandAndLockInstance()
4273 self.needed_locks[locking.LEVEL_NODE] = []
4274 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4276 def DeclareLocks(self, level):
4277 if level == locking.LEVEL_NODE:
4278 self._LockInstancesNodes()
4280 def CheckPrereq(self):
4281 """Check prerequisites.
4283 This checks that the instance is in the cluster.
4286 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4287 assert self.instance is not None, \
4288 "Cannot retrieve locked instance %s" % self.op.instance_name
4289 _CheckNodeOnline(self, self.instance.primary_node)
4291 def Exec(self, feedback_fn):
4292 """Activate the disks.
4295 disks_ok, disks_info = \
4296 _AssembleInstanceDisks(self, self.instance,
4297 ignore_size=self.op.ignore_size)
4299 raise errors.OpExecError("Cannot activate block devices")
4304 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4306 """Prepare the block devices for an instance.
4308 This sets up the block devices on all nodes.
4310 @type lu: L{LogicalUnit}
4311 @param lu: the logical unit on whose behalf we execute
4312 @type instance: L{objects.Instance}
4313 @param instance: the instance for whose disks we assemble
4314 @type disks: list of L{objects.Disk} or None
4315 @param disks: which disks to assemble (or all, if None)
4316 @type ignore_secondaries: boolean
4317 @param ignore_secondaries: if true, errors on secondary nodes
4318 won't result in an error return from the function
4319 @type ignore_size: boolean
4320 @param ignore_size: if true, the current known size of the disk
4321 will not be used during the disk activation, useful for cases
4322 when the size is wrong
4323 @return: False if the operation failed, otherwise a list of
4324 (host, instance_visible_name, node_visible_name)
4325 with the mapping from node devices to instance devices
4330 iname = instance.name
4331 disks = _ExpandCheckDisks(instance, disks)
4333 # With the two passes mechanism we try to reduce the window of
4334 # opportunity for the race condition of switching DRBD to primary
4335 # before handshaking occured, but we do not eliminate it
4337 # The proper fix would be to wait (with some limits) until the
4338 # connection has been made and drbd transitions from WFConnection
4339 # into any other network-connected state (Connected, SyncTarget,
4342 # 1st pass, assemble on all nodes in secondary mode
4343 for inst_disk in disks:
4344 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4346 node_disk = node_disk.Copy()
4347 node_disk.UnsetSize()
4348 lu.cfg.SetDiskID(node_disk, node)
4349 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4350 msg = result.fail_msg
4352 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4353 " (is_primary=False, pass=1): %s",
4354 inst_disk.iv_name, node, msg)
4355 if not ignore_secondaries:
4358 # FIXME: race condition on drbd migration to primary
4360 # 2nd pass, do only the primary node
4361 for inst_disk in disks:
4364 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4365 if node != instance.primary_node:
4368 node_disk = node_disk.Copy()
4369 node_disk.UnsetSize()
4370 lu.cfg.SetDiskID(node_disk, node)
4371 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4372 msg = result.fail_msg
4374 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4375 " (is_primary=True, pass=2): %s",
4376 inst_disk.iv_name, node, msg)
4379 dev_path = result.payload
4381 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4383 # leave the disks configured for the primary node
4384 # this is a workaround that would be fixed better by
4385 # improving the logical/physical id handling
4387 lu.cfg.SetDiskID(disk, instance.primary_node)
4389 return disks_ok, device_info
4392 def _StartInstanceDisks(lu, instance, force):
4393 """Start the disks of an instance.
4396 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4397 ignore_secondaries=force)
4399 _ShutdownInstanceDisks(lu, instance)
4400 if force is not None and not force:
4401 lu.proc.LogWarning("", hint="If the message above refers to a"
4403 " you can retry the operation using '--force'.")
4404 raise errors.OpExecError("Disk consistency error")
4407 class LUDeactivateInstanceDisks(NoHooksLU):
4408 """Shutdown an instance's disks.
4416 def ExpandNames(self):
4417 self._ExpandAndLockInstance()
4418 self.needed_locks[locking.LEVEL_NODE] = []
4419 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4421 def DeclareLocks(self, level):
4422 if level == locking.LEVEL_NODE:
4423 self._LockInstancesNodes()
4425 def CheckPrereq(self):
4426 """Check prerequisites.
4428 This checks that the instance is in the cluster.
4431 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4432 assert self.instance is not None, \
4433 "Cannot retrieve locked instance %s" % self.op.instance_name
4435 def Exec(self, feedback_fn):
4436 """Deactivate the disks
4439 instance = self.instance
4440 _SafeShutdownInstanceDisks(self, instance)
4443 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4444 """Shutdown block devices of an instance.
4446 This function checks if an instance is running, before calling
4447 _ShutdownInstanceDisks.
4450 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4451 _ShutdownInstanceDisks(lu, instance, disks=disks)
4454 def _ExpandCheckDisks(instance, disks):
4455 """Return the instance disks selected by the disks list
4457 @type disks: list of L{objects.Disk} or None
4458 @param disks: selected disks
4459 @rtype: list of L{objects.Disk}
4460 @return: selected instance disks to act on
4464 return instance.disks
4466 if not set(disks).issubset(instance.disks):
4467 raise errors.ProgrammerError("Can only act on disks belonging to the"
4472 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4473 """Shutdown block devices of an instance.
4475 This does the shutdown on all nodes of the instance.
4477 If the ignore_primary is false, errors on the primary node are
4482 disks = _ExpandCheckDisks(instance, disks)
4485 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4486 lu.cfg.SetDiskID(top_disk, node)
4487 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4488 msg = result.fail_msg
4490 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4491 disk.iv_name, node, msg)
4492 if not ignore_primary or node != instance.primary_node:
4497 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4498 """Checks if a node has enough free memory.
4500 This function check if a given node has the needed amount of free
4501 memory. In case the node has less memory or we cannot get the
4502 information from the node, this function raise an OpPrereqError
4505 @type lu: C{LogicalUnit}
4506 @param lu: a logical unit from which we get configuration data
4508 @param node: the node to check
4509 @type reason: C{str}
4510 @param reason: string to use in the error message
4511 @type requested: C{int}
4512 @param requested: the amount of memory in MiB to check for
4513 @type hypervisor_name: C{str}
4514 @param hypervisor_name: the hypervisor to ask for memory stats
4515 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4516 we cannot check the node
4519 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4520 nodeinfo[node].Raise("Can't get data from node %s" % node,
4521 prereq=True, ecode=errors.ECODE_ENVIRON)
4522 free_mem = nodeinfo[node].payload.get('memory_free', None)
4523 if not isinstance(free_mem, int):
4524 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4525 " was '%s'" % (node, free_mem),
4526 errors.ECODE_ENVIRON)
4527 if requested > free_mem:
4528 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4529 " needed %s MiB, available %s MiB" %
4530 (node, reason, requested, free_mem),
4534 def _CheckNodesFreeDisk(lu, nodenames, requested):
4535 """Checks if nodes have enough free disk space in the default VG.
4537 This function check if all given nodes have the needed amount of
4538 free disk. In case any node has less disk or we cannot get the
4539 information from the node, this function raise an OpPrereqError
4542 @type lu: C{LogicalUnit}
4543 @param lu: a logical unit from which we get configuration data
4544 @type nodenames: C{list}
4545 @param nodenames: the list of node names to check
4546 @type requested: C{int}
4547 @param requested: the amount of disk in MiB to check for
4548 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4549 we cannot check the node
4552 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4553 lu.cfg.GetHypervisorType())
4554 for node in nodenames:
4555 info = nodeinfo[node]
4556 info.Raise("Cannot get current information from node %s" % node,
4557 prereq=True, ecode=errors.ECODE_ENVIRON)
4558 vg_free = info.payload.get("vg_free", None)
4559 if not isinstance(vg_free, int):
4560 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4561 " result was '%s'" % (node, vg_free),
4562 errors.ECODE_ENVIRON)
4563 if requested > vg_free:
4564 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4565 " required %d MiB, available %d MiB" %
4566 (node, requested, vg_free),
4570 class LUStartupInstance(LogicalUnit):
4571 """Starts an instance.
4574 HPATH = "instance-start"
4575 HTYPE = constants.HTYPE_INSTANCE
4579 ("hvparams", _EmptyDict, _TDict),
4580 ("beparams", _EmptyDict, _TDict),
4584 def CheckArguments(self):
4586 if self.op.beparams:
4587 # fill the beparams dict
4588 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4590 def ExpandNames(self):
4591 self._ExpandAndLockInstance()
4593 def BuildHooksEnv(self):
4596 This runs on master, primary and secondary nodes of the instance.
4600 "FORCE": self.op.force,
4602 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4603 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4606 def CheckPrereq(self):
4607 """Check prerequisites.
4609 This checks that the instance is in the cluster.
4612 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4613 assert self.instance is not None, \
4614 "Cannot retrieve locked instance %s" % self.op.instance_name
4617 if self.op.hvparams:
4618 # check hypervisor parameter syntax (locally)
4619 cluster = self.cfg.GetClusterInfo()
4620 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4621 filled_hvp = cluster.FillHV(instance)
4622 filled_hvp.update(self.op.hvparams)
4623 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4624 hv_type.CheckParameterSyntax(filled_hvp)
4625 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4627 _CheckNodeOnline(self, instance.primary_node)
4629 bep = self.cfg.GetClusterInfo().FillBE(instance)
4630 # check bridges existence
4631 _CheckInstanceBridgesExist(self, instance)
4633 remote_info = self.rpc.call_instance_info(instance.primary_node,
4635 instance.hypervisor)
4636 remote_info.Raise("Error checking node %s" % instance.primary_node,
4637 prereq=True, ecode=errors.ECODE_ENVIRON)
4638 if not remote_info.payload: # not running already
4639 _CheckNodeFreeMemory(self, instance.primary_node,
4640 "starting instance %s" % instance.name,
4641 bep[constants.BE_MEMORY], instance.hypervisor)
4643 def Exec(self, feedback_fn):
4644 """Start the instance.
4647 instance = self.instance
4648 force = self.op.force
4650 self.cfg.MarkInstanceUp(instance.name)
4652 node_current = instance.primary_node
4654 _StartInstanceDisks(self, instance, force)
4656 result = self.rpc.call_instance_start(node_current, instance,
4657 self.op.hvparams, self.op.beparams)
4658 msg = result.fail_msg
4660 _ShutdownInstanceDisks(self, instance)
4661 raise errors.OpExecError("Could not start instance: %s" % msg)
4664 class LURebootInstance(LogicalUnit):
4665 """Reboot an instance.
4668 HPATH = "instance-reboot"
4669 HTYPE = constants.HTYPE_INSTANCE
4672 ("ignore_secondaries", False, _TBool),
4673 ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4678 def ExpandNames(self):
4679 self._ExpandAndLockInstance()
4681 def BuildHooksEnv(self):
4684 This runs on master, primary and secondary nodes of the instance.
4688 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4689 "REBOOT_TYPE": self.op.reboot_type,
4690 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4692 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4693 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4696 def CheckPrereq(self):
4697 """Check prerequisites.
4699 This checks that the instance is in the cluster.
4702 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4703 assert self.instance is not None, \
4704 "Cannot retrieve locked instance %s" % self.op.instance_name
4706 _CheckNodeOnline(self, instance.primary_node)
4708 # check bridges existence
4709 _CheckInstanceBridgesExist(self, instance)
4711 def Exec(self, feedback_fn):
4712 """Reboot the instance.
4715 instance = self.instance
4716 ignore_secondaries = self.op.ignore_secondaries
4717 reboot_type = self.op.reboot_type
4719 node_current = instance.primary_node
4721 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4722 constants.INSTANCE_REBOOT_HARD]:
4723 for disk in instance.disks:
4724 self.cfg.SetDiskID(disk, node_current)
4725 result = self.rpc.call_instance_reboot(node_current, instance,
4727 self.op.shutdown_timeout)
4728 result.Raise("Could not reboot instance")
4730 result = self.rpc.call_instance_shutdown(node_current, instance,
4731 self.op.shutdown_timeout)
4732 result.Raise("Could not shutdown instance for full reboot")
4733 _ShutdownInstanceDisks(self, instance)
4734 _StartInstanceDisks(self, instance, ignore_secondaries)
4735 result = self.rpc.call_instance_start(node_current, instance, None, None)
4736 msg = result.fail_msg
4738 _ShutdownInstanceDisks(self, instance)
4739 raise errors.OpExecError("Could not start instance for"
4740 " full reboot: %s" % msg)
4742 self.cfg.MarkInstanceUp(instance.name)
4745 class LUShutdownInstance(LogicalUnit):
4746 """Shutdown an instance.
4749 HPATH = "instance-stop"
4750 HTYPE = constants.HTYPE_INSTANCE
4753 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4757 def ExpandNames(self):
4758 self._ExpandAndLockInstance()
4760 def BuildHooksEnv(self):
4763 This runs on master, primary and secondary nodes of the instance.
4766 env = _BuildInstanceHookEnvByObject(self, self.instance)
4767 env["TIMEOUT"] = self.op.timeout
4768 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4771 def CheckPrereq(self):
4772 """Check prerequisites.
4774 This checks that the instance is in the cluster.
4777 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4778 assert self.instance is not None, \
4779 "Cannot retrieve locked instance %s" % self.op.instance_name
4780 _CheckNodeOnline(self, self.instance.primary_node)
4782 def Exec(self, feedback_fn):
4783 """Shutdown the instance.
4786 instance = self.instance
4787 node_current = instance.primary_node
4788 timeout = self.op.timeout
4789 self.cfg.MarkInstanceDown(instance.name)
4790 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4791 msg = result.fail_msg
4793 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4795 _ShutdownInstanceDisks(self, instance)
4798 class LUReinstallInstance(LogicalUnit):
4799 """Reinstall an instance.
4802 HPATH = "instance-reinstall"
4803 HTYPE = constants.HTYPE_INSTANCE
4806 ("os_type", None, _TMaybeString),
4807 ("force_variant", False, _TBool),
4811 def ExpandNames(self):
4812 self._ExpandAndLockInstance()
4814 def BuildHooksEnv(self):
4817 This runs on master, primary and secondary nodes of the instance.
4820 env = _BuildInstanceHookEnvByObject(self, self.instance)
4821 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4824 def CheckPrereq(self):
4825 """Check prerequisites.
4827 This checks that the instance is in the cluster and is not running.
4830 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4831 assert instance is not None, \
4832 "Cannot retrieve locked instance %s" % self.op.instance_name
4833 _CheckNodeOnline(self, instance.primary_node)
4835 if instance.disk_template == constants.DT_DISKLESS:
4836 raise errors.OpPrereqError("Instance '%s' has no disks" %
4837 self.op.instance_name,
4839 _CheckInstanceDown(self, instance, "cannot reinstall")
4841 if self.op.os_type is not None:
4843 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4844 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4846 self.instance = instance
4848 def Exec(self, feedback_fn):
4849 """Reinstall the instance.
4852 inst = self.instance
4854 if self.op.os_type is not None:
4855 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4856 inst.os = self.op.os_type
4857 self.cfg.Update(inst, feedback_fn)
4859 _StartInstanceDisks(self, inst, None)
4861 feedback_fn("Running the instance OS create scripts...")
4862 # FIXME: pass debug option from opcode to backend
4863 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4864 self.op.debug_level)
4865 result.Raise("Could not install OS for instance %s on node %s" %
4866 (inst.name, inst.primary_node))
4868 _ShutdownInstanceDisks(self, inst)
4871 class LURecreateInstanceDisks(LogicalUnit):
4872 """Recreate an instance's missing disks.
4875 HPATH = "instance-recreate-disks"
4876 HTYPE = constants.HTYPE_INSTANCE
4879 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4883 def ExpandNames(self):
4884 self._ExpandAndLockInstance()
4886 def BuildHooksEnv(self):
4889 This runs on master, primary and secondary nodes of the instance.
4892 env = _BuildInstanceHookEnvByObject(self, self.instance)
4893 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4896 def CheckPrereq(self):
4897 """Check prerequisites.
4899 This checks that the instance is in the cluster and is not running.
4902 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4903 assert instance is not None, \
4904 "Cannot retrieve locked instance %s" % self.op.instance_name
4905 _CheckNodeOnline(self, instance.primary_node)
4907 if instance.disk_template == constants.DT_DISKLESS:
4908 raise errors.OpPrereqError("Instance '%s' has no disks" %
4909 self.op.instance_name, errors.ECODE_INVAL)
4910 _CheckInstanceDown(self, instance, "cannot recreate disks")
4912 if not self.op.disks:
4913 self.op.disks = range(len(instance.disks))
4915 for idx in self.op.disks:
4916 if idx >= len(instance.disks):
4917 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4920 self.instance = instance
4922 def Exec(self, feedback_fn):
4923 """Recreate the disks.
4927 for idx, _ in enumerate(self.instance.disks):
4928 if idx not in self.op.disks: # disk idx has not been passed in
4932 _CreateDisks(self, self.instance, to_skip=to_skip)
4935 class LURenameInstance(LogicalUnit):
4936 """Rename an instance.
4939 HPATH = "instance-rename"
4940 HTYPE = constants.HTYPE_INSTANCE
4943 ("new_name", _NoDefault, _TNonEmptyString),
4944 ("ip_check", False, _TBool),
4945 ("name_check", True, _TBool),
4948 def CheckArguments(self):
4952 if self.op.ip_check and not self.op.name_check:
4953 # TODO: make the ip check more flexible and not depend on the name check
4954 raise errors.OpPrereqError("Cannot do ip check without a name check",
4957 def BuildHooksEnv(self):
4960 This runs on master, primary and secondary nodes of the instance.
4963 env = _BuildInstanceHookEnvByObject(self, self.instance)
4964 env["INSTANCE_NEW_NAME"] = self.op.new_name
4965 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4968 def CheckPrereq(self):
4969 """Check prerequisites.
4971 This checks that the instance is in the cluster and is not running.
4974 self.op.instance_name = _ExpandInstanceName(self.cfg,
4975 self.op.instance_name)
4976 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4977 assert instance is not None
4978 _CheckNodeOnline(self, instance.primary_node)
4979 _CheckInstanceDown(self, instance, "cannot rename")
4980 self.instance = instance
4982 new_name = self.op.new_name
4983 if self.op.name_check:
4984 hostname = netutils.GetHostname(name=new_name)
4985 new_name = self.op.new_name = hostname.name
4986 if (self.op.ip_check and
4987 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
4988 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4989 (hostname.ip, new_name),
4990 errors.ECODE_NOTUNIQUE)
4992 instance_list = self.cfg.GetInstanceList()
4993 if new_name in instance_list:
4994 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4995 new_name, errors.ECODE_EXISTS)
4997 def Exec(self, feedback_fn):
4998 """Reinstall the instance.
5001 inst = self.instance
5002 old_name = inst.name
5004 if inst.disk_template == constants.DT_FILE:
5005 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5007 self.cfg.RenameInstance(inst.name, self.op.new_name)
5008 # Change the instance lock. This is definitely safe while we hold the BGL
5009 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5010 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5012 # re-read the instance from the configuration after rename
5013 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5015 if inst.disk_template == constants.DT_FILE:
5016 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5017 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5018 old_file_storage_dir,
5019 new_file_storage_dir)
5020 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5021 " (but the instance has been renamed in Ganeti)" %
5022 (inst.primary_node, old_file_storage_dir,
5023 new_file_storage_dir))
5025 _StartInstanceDisks(self, inst, None)
5027 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5028 old_name, self.op.debug_level)
5029 msg = result.fail_msg
5031 msg = ("Could not run OS rename script for instance %s on node %s"
5032 " (but the instance has been renamed in Ganeti): %s" %
5033 (inst.name, inst.primary_node, msg))
5034 self.proc.LogWarning(msg)
5036 _ShutdownInstanceDisks(self, inst)
5041 class LURemoveInstance(LogicalUnit):
5042 """Remove an instance.
5045 HPATH = "instance-remove"
5046 HTYPE = constants.HTYPE_INSTANCE
5049 ("ignore_failures", False, _TBool),
5054 def ExpandNames(self):
5055 self._ExpandAndLockInstance()
5056 self.needed_locks[locking.LEVEL_NODE] = []
5057 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5059 def DeclareLocks(self, level):
5060 if level == locking.LEVEL_NODE:
5061 self._LockInstancesNodes()
5063 def BuildHooksEnv(self):
5066 This runs on master, primary and secondary nodes of the instance.
5069 env = _BuildInstanceHookEnvByObject(self, self.instance)
5070 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5071 nl = [self.cfg.GetMasterNode()]
5072 nl_post = list(self.instance.all_nodes) + nl
5073 return env, nl, nl_post
5075 def CheckPrereq(self):
5076 """Check prerequisites.
5078 This checks that the instance is in the cluster.
5081 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5082 assert self.instance is not None, \
5083 "Cannot retrieve locked instance %s" % self.op.instance_name
5085 def Exec(self, feedback_fn):
5086 """Remove the instance.
5089 instance = self.instance
5090 logging.info("Shutting down instance %s on node %s",
5091 instance.name, instance.primary_node)
5093 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5094 self.op.shutdown_timeout)
5095 msg = result.fail_msg
5097 if self.op.ignore_failures:
5098 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5100 raise errors.OpExecError("Could not shutdown instance %s on"
5102 (instance.name, instance.primary_node, msg))
5104 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5107 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5108 """Utility function to remove an instance.
5111 logging.info("Removing block devices for instance %s", instance.name)
5113 if not _RemoveDisks(lu, instance):
5114 if not ignore_failures:
5115 raise errors.OpExecError("Can't remove instance's disks")
5116 feedback_fn("Warning: can't remove instance's disks")
5118 logging.info("Removing instance %s out of cluster config", instance.name)
5120 lu.cfg.RemoveInstance(instance.name)
5122 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5123 "Instance lock removal conflict"
5125 # Remove lock for the instance
5126 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5129 class LUQueryInstances(NoHooksLU):
5130 """Logical unit for querying instances.
5133 # pylint: disable-msg=W0142
5135 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5136 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5137 ("use_locking", False, _TBool),
5140 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5141 "serial_no", "ctime", "mtime", "uuid"]
5142 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5144 "disk_template", "ip", "mac", "bridge",
5145 "nic_mode", "nic_link",
5146 "sda_size", "sdb_size", "vcpus", "tags",
5147 "network_port", "beparams",
5148 r"(disk)\.(size)/([0-9]+)",
5149 r"(disk)\.(sizes)", "disk_usage",
5150 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5151 r"(nic)\.(bridge)/([0-9]+)",
5152 r"(nic)\.(macs|ips|modes|links|bridges)",
5153 r"(disk|nic)\.(count)",
5155 ] + _SIMPLE_FIELDS +
5157 for name in constants.HVS_PARAMETERS
5158 if name not in constants.HVC_GLOBALS] +
5160 for name in constants.BES_PARAMETERS])
5161 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5167 def CheckArguments(self):
5168 _CheckOutputFields(static=self._FIELDS_STATIC,
5169 dynamic=self._FIELDS_DYNAMIC,
5170 selected=self.op.output_fields)
5172 def ExpandNames(self):
5173 self.needed_locks = {}
5174 self.share_locks[locking.LEVEL_INSTANCE] = 1
5175 self.share_locks[locking.LEVEL_NODE] = 1
5178 self.wanted = _GetWantedInstances(self, self.op.names)
5180 self.wanted = locking.ALL_SET
5182 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5183 self.do_locking = self.do_node_query and self.op.use_locking
5185 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5186 self.needed_locks[locking.LEVEL_NODE] = []
5187 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5189 def DeclareLocks(self, level):
5190 if level == locking.LEVEL_NODE and self.do_locking:
5191 self._LockInstancesNodes()
5193 def Exec(self, feedback_fn):
5194 """Computes the list of nodes and their attributes.
5197 # pylint: disable-msg=R0912
5198 # way too many branches here
5199 all_info = self.cfg.GetAllInstancesInfo()
5200 if self.wanted == locking.ALL_SET:
5201 # caller didn't specify instance names, so ordering is not important
5203 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5205 instance_names = all_info.keys()
5206 instance_names = utils.NiceSort(instance_names)
5208 # caller did specify names, so we must keep the ordering
5210 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5212 tgt_set = all_info.keys()
5213 missing = set(self.wanted).difference(tgt_set)
5215 raise errors.OpExecError("Some instances were removed before"
5216 " retrieving their data: %s" % missing)
5217 instance_names = self.wanted
5219 instance_list = [all_info[iname] for iname in instance_names]
5221 # begin data gathering
5223 nodes = frozenset([inst.primary_node for inst in instance_list])
5224 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5228 if self.do_node_query:
5230 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5232 result = node_data[name]
5234 # offline nodes will be in both lists
5235 off_nodes.append(name)
5237 bad_nodes.append(name)
5240 live_data.update(result.payload)
5241 # else no instance is alive
5243 live_data = dict([(name, {}) for name in instance_names])
5245 # end data gathering
5250 cluster = self.cfg.GetClusterInfo()
5251 for instance in instance_list:
5253 i_hv = cluster.FillHV(instance, skip_globals=True)
5254 i_be = cluster.FillBE(instance)
5255 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5256 for field in self.op.output_fields:
5257 st_match = self._FIELDS_STATIC.Matches(field)
5258 if field in self._SIMPLE_FIELDS:
5259 val = getattr(instance, field)
5260 elif field == "pnode":
5261 val = instance.primary_node
5262 elif field == "snodes":
5263 val = list(instance.secondary_nodes)
5264 elif field == "admin_state":
5265 val = instance.admin_up
5266 elif field == "oper_state":
5267 if instance.primary_node in bad_nodes:
5270 val = bool(live_data.get(instance.name))
5271 elif field == "status":
5272 if instance.primary_node in off_nodes:
5273 val = "ERROR_nodeoffline"
5274 elif instance.primary_node in bad_nodes:
5275 val = "ERROR_nodedown"
5277 running = bool(live_data.get(instance.name))
5279 if instance.admin_up:
5284 if instance.admin_up:
5288 elif field == "oper_ram":
5289 if instance.primary_node in bad_nodes:
5291 elif instance.name in live_data:
5292 val = live_data[instance.name].get("memory", "?")
5295 elif field == "oper_vcpus":
5296 if instance.primary_node in bad_nodes:
5298 elif instance.name in live_data:
5299 val = live_data[instance.name].get("vcpus", "?")
5302 elif field == "vcpus":
5303 val = i_be[constants.BE_VCPUS]
5304 elif field == "disk_template":
5305 val = instance.disk_template
5308 val = instance.nics[0].ip
5311 elif field == "nic_mode":
5313 val = i_nicp[0][constants.NIC_MODE]
5316 elif field == "nic_link":
5318 val = i_nicp[0][constants.NIC_LINK]
5321 elif field == "bridge":
5322 if (instance.nics and
5323 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5324 val = i_nicp[0][constants.NIC_LINK]
5327 elif field == "mac":
5329 val = instance.nics[0].mac
5332 elif field == "sda_size" or field == "sdb_size":
5333 idx = ord(field[2]) - ord('a')
5335 val = instance.FindDisk(idx).size
5336 except errors.OpPrereqError:
5338 elif field == "disk_usage": # total disk usage per node
5339 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5340 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5341 elif field == "tags":
5342 val = list(instance.GetTags())
5343 elif field == "hvparams":
5345 elif (field.startswith(HVPREFIX) and
5346 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5347 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5348 val = i_hv.get(field[len(HVPREFIX):], None)
5349 elif field == "beparams":
5351 elif (field.startswith(BEPREFIX) and
5352 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5353 val = i_be.get(field[len(BEPREFIX):], None)
5354 elif st_match and st_match.groups():
5355 # matches a variable list
5356 st_groups = st_match.groups()
5357 if st_groups and st_groups[0] == "disk":
5358 if st_groups[1] == "count":
5359 val = len(instance.disks)
5360 elif st_groups[1] == "sizes":
5361 val = [disk.size for disk in instance.disks]
5362 elif st_groups[1] == "size":
5364 val = instance.FindDisk(st_groups[2]).size
5365 except errors.OpPrereqError:
5368 assert False, "Unhandled disk parameter"
5369 elif st_groups[0] == "nic":
5370 if st_groups[1] == "count":
5371 val = len(instance.nics)
5372 elif st_groups[1] == "macs":
5373 val = [nic.mac for nic in instance.nics]
5374 elif st_groups[1] == "ips":
5375 val = [nic.ip for nic in instance.nics]
5376 elif st_groups[1] == "modes":
5377 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5378 elif st_groups[1] == "links":
5379 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5380 elif st_groups[1] == "bridges":
5383 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5384 val.append(nicp[constants.NIC_LINK])
5389 nic_idx = int(st_groups[2])
5390 if nic_idx >= len(instance.nics):
5393 if st_groups[1] == "mac":
5394 val = instance.nics[nic_idx].mac
5395 elif st_groups[1] == "ip":
5396 val = instance.nics[nic_idx].ip
5397 elif st_groups[1] == "mode":
5398 val = i_nicp[nic_idx][constants.NIC_MODE]
5399 elif st_groups[1] == "link":
5400 val = i_nicp[nic_idx][constants.NIC_LINK]
5401 elif st_groups[1] == "bridge":
5402 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5403 if nic_mode == constants.NIC_MODE_BRIDGED:
5404 val = i_nicp[nic_idx][constants.NIC_LINK]
5408 assert False, "Unhandled NIC parameter"
5410 assert False, ("Declared but unhandled variable parameter '%s'" %
5413 assert False, "Declared but unhandled parameter '%s'" % field
5420 class LUFailoverInstance(LogicalUnit):
5421 """Failover an instance.
5424 HPATH = "instance-failover"
5425 HTYPE = constants.HTYPE_INSTANCE
5428 ("ignore_consistency", False, _TBool),
5433 def ExpandNames(self):
5434 self._ExpandAndLockInstance()
5435 self.needed_locks[locking.LEVEL_NODE] = []
5436 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5438 def DeclareLocks(self, level):
5439 if level == locking.LEVEL_NODE:
5440 self._LockInstancesNodes()
5442 def BuildHooksEnv(self):
5445 This runs on master, primary and secondary nodes of the instance.
5448 instance = self.instance
5449 source_node = instance.primary_node
5450 target_node = instance.secondary_nodes[0]
5452 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5453 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5454 "OLD_PRIMARY": source_node,
5455 "OLD_SECONDARY": target_node,
5456 "NEW_PRIMARY": target_node,
5457 "NEW_SECONDARY": source_node,
5459 env.update(_BuildInstanceHookEnvByObject(self, instance))
5460 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5462 nl_post.append(source_node)
5463 return env, nl, nl_post
5465 def CheckPrereq(self):
5466 """Check prerequisites.
5468 This checks that the instance is in the cluster.
5471 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5472 assert self.instance is not None, \
5473 "Cannot retrieve locked instance %s" % self.op.instance_name
5475 bep = self.cfg.GetClusterInfo().FillBE(instance)
5476 if instance.disk_template not in constants.DTS_NET_MIRROR:
5477 raise errors.OpPrereqError("Instance's disk layout is not"
5478 " network mirrored, cannot failover.",
5481 secondary_nodes = instance.secondary_nodes
5482 if not secondary_nodes:
5483 raise errors.ProgrammerError("no secondary node but using "
5484 "a mirrored disk template")
5486 target_node = secondary_nodes[0]
5487 _CheckNodeOnline(self, target_node)
5488 _CheckNodeNotDrained(self, target_node)
5489 if instance.admin_up:
5490 # check memory requirements on the secondary node
5491 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5492 instance.name, bep[constants.BE_MEMORY],
5493 instance.hypervisor)
5495 self.LogInfo("Not checking memory on the secondary node as"
5496 " instance will not be started")
5498 # check bridge existance
5499 _CheckInstanceBridgesExist(self, instance, node=target_node)
5501 def Exec(self, feedback_fn):
5502 """Failover an instance.
5504 The failover is done by shutting it down on its present node and
5505 starting it on the secondary.
5508 instance = self.instance
5509 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5511 source_node = instance.primary_node
5512 target_node = instance.secondary_nodes[0]
5514 if instance.admin_up:
5515 feedback_fn("* checking disk consistency between source and target")
5516 for dev in instance.disks:
5517 # for drbd, these are drbd over lvm
5518 if not _CheckDiskConsistency(self, dev, target_node, False):
5519 if not self.op.ignore_consistency:
5520 raise errors.OpExecError("Disk %s is degraded on target node,"
5521 " aborting failover." % dev.iv_name)
5523 feedback_fn("* not checking disk consistency as instance is not running")
5525 feedback_fn("* shutting down instance on source node")
5526 logging.info("Shutting down instance %s on node %s",
5527 instance.name, source_node)
5529 result = self.rpc.call_instance_shutdown(source_node, instance,
5530 self.op.shutdown_timeout)
5531 msg = result.fail_msg
5533 if self.op.ignore_consistency or primary_node.offline:
5534 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5535 " Proceeding anyway. Please make sure node"
5536 " %s is down. Error details: %s",
5537 instance.name, source_node, source_node, msg)
5539 raise errors.OpExecError("Could not shutdown instance %s on"
5541 (instance.name, source_node, msg))
5543 feedback_fn("* deactivating the instance's disks on source node")
5544 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5545 raise errors.OpExecError("Can't shut down the instance's disks.")
5547 instance.primary_node = target_node
5548 # distribute new instance config to the other nodes
5549 self.cfg.Update(instance, feedback_fn)
5551 # Only start the instance if it's marked as up
5552 if instance.admin_up:
5553 feedback_fn("* activating the instance's disks on target node")
5554 logging.info("Starting instance %s on node %s",
5555 instance.name, target_node)
5557 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5558 ignore_secondaries=True)
5560 _ShutdownInstanceDisks(self, instance)
5561 raise errors.OpExecError("Can't activate the instance's disks")
5563 feedback_fn("* starting the instance on the target node")
5564 result = self.rpc.call_instance_start(target_node, instance, None, None)
5565 msg = result.fail_msg
5567 _ShutdownInstanceDisks(self, instance)
5568 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5569 (instance.name, target_node, msg))
5572 class LUMigrateInstance(LogicalUnit):
5573 """Migrate an instance.
5575 This is migration without shutting down, compared to the failover,
5576 which is done with shutdown.
5579 HPATH = "instance-migrate"
5580 HTYPE = constants.HTYPE_INSTANCE
5585 ("cleanup", False, _TBool),
5590 def ExpandNames(self):
5591 self._ExpandAndLockInstance()
5593 self.needed_locks[locking.LEVEL_NODE] = []
5594 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5596 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5598 self.tasklets = [self._migrater]
5600 def DeclareLocks(self, level):
5601 if level == locking.LEVEL_NODE:
5602 self._LockInstancesNodes()
5604 def BuildHooksEnv(self):
5607 This runs on master, primary and secondary nodes of the instance.
5610 instance = self._migrater.instance
5611 source_node = instance.primary_node
5612 target_node = instance.secondary_nodes[0]
5613 env = _BuildInstanceHookEnvByObject(self, instance)
5614 env["MIGRATE_LIVE"] = self._migrater.live
5615 env["MIGRATE_CLEANUP"] = self.op.cleanup
5617 "OLD_PRIMARY": source_node,
5618 "OLD_SECONDARY": target_node,
5619 "NEW_PRIMARY": target_node,
5620 "NEW_SECONDARY": source_node,
5622 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5624 nl_post.append(source_node)
5625 return env, nl, nl_post
5628 class LUMoveInstance(LogicalUnit):
5629 """Move an instance by data-copying.
5632 HPATH = "instance-move"
5633 HTYPE = constants.HTYPE_INSTANCE
5636 ("target_node", _NoDefault, _TNonEmptyString),
5641 def ExpandNames(self):
5642 self._ExpandAndLockInstance()
5643 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5644 self.op.target_node = target_node
5645 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5646 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5648 def DeclareLocks(self, level):
5649 if level == locking.LEVEL_NODE:
5650 self._LockInstancesNodes(primary_only=True)
5652 def BuildHooksEnv(self):
5655 This runs on master, primary and secondary nodes of the instance.
5659 "TARGET_NODE": self.op.target_node,
5660 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5662 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5663 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5664 self.op.target_node]
5667 def CheckPrereq(self):
5668 """Check prerequisites.
5670 This checks that the instance is in the cluster.
5673 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5674 assert self.instance is not None, \
5675 "Cannot retrieve locked instance %s" % self.op.instance_name
5677 node = self.cfg.GetNodeInfo(self.op.target_node)
5678 assert node is not None, \
5679 "Cannot retrieve locked node %s" % self.op.target_node
5681 self.target_node = target_node = node.name
5683 if target_node == instance.primary_node:
5684 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5685 (instance.name, target_node),
5688 bep = self.cfg.GetClusterInfo().FillBE(instance)
5690 for idx, dsk in enumerate(instance.disks):
5691 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5692 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5693 " cannot copy" % idx, errors.ECODE_STATE)
5695 _CheckNodeOnline(self, target_node)
5696 _CheckNodeNotDrained(self, target_node)
5698 if instance.admin_up:
5699 # check memory requirements on the secondary node
5700 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5701 instance.name, bep[constants.BE_MEMORY],
5702 instance.hypervisor)
5704 self.LogInfo("Not checking memory on the secondary node as"
5705 " instance will not be started")
5707 # check bridge existance
5708 _CheckInstanceBridgesExist(self, instance, node=target_node)
5710 def Exec(self, feedback_fn):
5711 """Move an instance.
5713 The move is done by shutting it down on its present node, copying
5714 the data over (slow) and starting it on the new node.
5717 instance = self.instance
5719 source_node = instance.primary_node
5720 target_node = self.target_node
5722 self.LogInfo("Shutting down instance %s on source node %s",
5723 instance.name, source_node)
5725 result = self.rpc.call_instance_shutdown(source_node, instance,
5726 self.op.shutdown_timeout)
5727 msg = result.fail_msg
5729 if self.op.ignore_consistency:
5730 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5731 " Proceeding anyway. Please make sure node"
5732 " %s is down. Error details: %s",
5733 instance.name, source_node, source_node, msg)
5735 raise errors.OpExecError("Could not shutdown instance %s on"
5737 (instance.name, source_node, msg))
5739 # create the target disks
5741 _CreateDisks(self, instance, target_node=target_node)
5742 except errors.OpExecError:
5743 self.LogWarning("Device creation failed, reverting...")
5745 _RemoveDisks(self, instance, target_node=target_node)
5747 self.cfg.ReleaseDRBDMinors(instance.name)
5750 cluster_name = self.cfg.GetClusterInfo().cluster_name
5753 # activate, get path, copy the data over
5754 for idx, disk in enumerate(instance.disks):
5755 self.LogInfo("Copying data for disk %d", idx)
5756 result = self.rpc.call_blockdev_assemble(target_node, disk,
5757 instance.name, True)
5759 self.LogWarning("Can't assemble newly created disk %d: %s",
5760 idx, result.fail_msg)
5761 errs.append(result.fail_msg)
5763 dev_path = result.payload
5764 result = self.rpc.call_blockdev_export(source_node, disk,
5765 target_node, dev_path,
5768 self.LogWarning("Can't copy data over for disk %d: %s",
5769 idx, result.fail_msg)
5770 errs.append(result.fail_msg)
5774 self.LogWarning("Some disks failed to copy, aborting")
5776 _RemoveDisks(self, instance, target_node=target_node)
5778 self.cfg.ReleaseDRBDMinors(instance.name)
5779 raise errors.OpExecError("Errors during disk copy: %s" %
5782 instance.primary_node = target_node
5783 self.cfg.Update(instance, feedback_fn)
5785 self.LogInfo("Removing the disks on the original node")
5786 _RemoveDisks(self, instance, target_node=source_node)
5788 # Only start the instance if it's marked as up
5789 if instance.admin_up:
5790 self.LogInfo("Starting instance %s on node %s",
5791 instance.name, target_node)
5793 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5794 ignore_secondaries=True)
5796 _ShutdownInstanceDisks(self, instance)
5797 raise errors.OpExecError("Can't activate the instance's disks")
5799 result = self.rpc.call_instance_start(target_node, instance, None, None)
5800 msg = result.fail_msg
5802 _ShutdownInstanceDisks(self, instance)
5803 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5804 (instance.name, target_node, msg))
5807 class LUMigrateNode(LogicalUnit):
5808 """Migrate all instances from a node.
5811 HPATH = "node-migrate"
5812 HTYPE = constants.HTYPE_NODE
5820 def ExpandNames(self):
5821 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5823 self.needed_locks = {
5824 locking.LEVEL_NODE: [self.op.node_name],
5827 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5829 # Create tasklets for migrating instances for all instances on this node
5833 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5834 logging.debug("Migrating instance %s", inst.name)
5835 names.append(inst.name)
5837 tasklets.append(TLMigrateInstance(self, inst.name, False))
5839 self.tasklets = tasklets
5841 # Declare instance locks
5842 self.needed_locks[locking.LEVEL_INSTANCE] = names
5844 def DeclareLocks(self, level):
5845 if level == locking.LEVEL_NODE:
5846 self._LockInstancesNodes()
5848 def BuildHooksEnv(self):
5851 This runs on the master, the primary and all the secondaries.
5855 "NODE_NAME": self.op.node_name,
5858 nl = [self.cfg.GetMasterNode()]
5860 return (env, nl, nl)
5863 class TLMigrateInstance(Tasklet):
5864 """Tasklet class for instance migration.
5867 @ivar live: whether the migration will be done live or non-live;
5868 this variable is initalized only after CheckPrereq has run
5871 def __init__(self, lu, instance_name, cleanup):
5872 """Initializes this class.
5875 Tasklet.__init__(self, lu)
5878 self.instance_name = instance_name
5879 self.cleanup = cleanup
5880 self.live = False # will be overridden later
5882 def CheckPrereq(self):
5883 """Check prerequisites.
5885 This checks that the instance is in the cluster.
5888 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5889 instance = self.cfg.GetInstanceInfo(instance_name)
5890 assert instance is not None
5892 if instance.disk_template != constants.DT_DRBD8:
5893 raise errors.OpPrereqError("Instance's disk layout is not"
5894 " drbd8, cannot migrate.", errors.ECODE_STATE)
5896 secondary_nodes = instance.secondary_nodes
5897 if not secondary_nodes:
5898 raise errors.ConfigurationError("No secondary node but using"
5899 " drbd8 disk template")
5901 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5903 target_node = secondary_nodes[0]
5904 # check memory requirements on the secondary node
5905 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5906 instance.name, i_be[constants.BE_MEMORY],
5907 instance.hypervisor)
5909 # check bridge existance
5910 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5912 if not self.cleanup:
5913 _CheckNodeNotDrained(self.lu, target_node)
5914 result = self.rpc.call_instance_migratable(instance.primary_node,
5916 result.Raise("Can't migrate, please use failover",
5917 prereq=True, ecode=errors.ECODE_STATE)
5919 self.instance = instance
5921 if self.lu.op.live is not None and self.lu.op.mode is not None:
5922 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5923 " parameters are accepted",
5925 if self.lu.op.live is not None:
5927 self.lu.op.mode = constants.HT_MIGRATION_LIVE
5929 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5930 # reset the 'live' parameter to None so that repeated
5931 # invocations of CheckPrereq do not raise an exception
5932 self.lu.op.live = None
5933 elif self.lu.op.mode is None:
5934 # read the default value from the hypervisor
5935 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5936 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5938 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5940 def _WaitUntilSync(self):
5941 """Poll with custom rpc for disk sync.
5943 This uses our own step-based rpc call.
5946 self.feedback_fn("* wait until resync is done")
5950 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5952 self.instance.disks)
5954 for node, nres in result.items():
5955 nres.Raise("Cannot resync disks on node %s" % node)
5956 node_done, node_percent = nres.payload
5957 all_done = all_done and node_done
5958 if node_percent is not None:
5959 min_percent = min(min_percent, node_percent)
5961 if min_percent < 100:
5962 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5965 def _EnsureSecondary(self, node):
5966 """Demote a node to secondary.
5969 self.feedback_fn("* switching node %s to secondary mode" % node)
5971 for dev in self.instance.disks:
5972 self.cfg.SetDiskID(dev, node)
5974 result = self.rpc.call_blockdev_close(node, self.instance.name,
5975 self.instance.disks)
5976 result.Raise("Cannot change disk to secondary on node %s" % node)
5978 def _GoStandalone(self):
5979 """Disconnect from the network.
5982 self.feedback_fn("* changing into standalone mode")
5983 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5984 self.instance.disks)
5985 for node, nres in result.items():
5986 nres.Raise("Cannot disconnect disks node %s" % node)
5988 def _GoReconnect(self, multimaster):
5989 """Reconnect to the network.
5995 msg = "single-master"
5996 self.feedback_fn("* changing disks into %s mode" % msg)
5997 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5998 self.instance.disks,
5999 self.instance.name, multimaster)
6000 for node, nres in result.items():
6001 nres.Raise("Cannot change disks config on node %s" % node)
6003 def _ExecCleanup(self):
6004 """Try to cleanup after a failed migration.
6006 The cleanup is done by:
6007 - check that the instance is running only on one node
6008 (and update the config if needed)
6009 - change disks on its secondary node to secondary
6010 - wait until disks are fully synchronized
6011 - disconnect from the network
6012 - change disks into single-master mode
6013 - wait again until disks are fully synchronized
6016 instance = self.instance
6017 target_node = self.target_node
6018 source_node = self.source_node
6020 # check running on only one node
6021 self.feedback_fn("* checking where the instance actually runs"
6022 " (if this hangs, the hypervisor might be in"
6024 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6025 for node, result in ins_l.items():
6026 result.Raise("Can't contact node %s" % node)
6028 runningon_source = instance.name in ins_l[source_node].payload
6029 runningon_target = instance.name in ins_l[target_node].payload
6031 if runningon_source and runningon_target:
6032 raise errors.OpExecError("Instance seems to be running on two nodes,"
6033 " or the hypervisor is confused. You will have"
6034 " to ensure manually that it runs only on one"
6035 " and restart this operation.")
6037 if not (runningon_source or runningon_target):
6038 raise errors.OpExecError("Instance does not seem to be running at all."
6039 " In this case, it's safer to repair by"
6040 " running 'gnt-instance stop' to ensure disk"
6041 " shutdown, and then restarting it.")
6043 if runningon_target:
6044 # the migration has actually succeeded, we need to update the config
6045 self.feedback_fn("* instance running on secondary node (%s),"
6046 " updating config" % target_node)
6047 instance.primary_node = target_node
6048 self.cfg.Update(instance, self.feedback_fn)
6049 demoted_node = source_node
6051 self.feedback_fn("* instance confirmed to be running on its"
6052 " primary node (%s)" % source_node)
6053 demoted_node = target_node
6055 self._EnsureSecondary(demoted_node)
6057 self._WaitUntilSync()
6058 except errors.OpExecError:
6059 # we ignore here errors, since if the device is standalone, it
6060 # won't be able to sync
6062 self._GoStandalone()
6063 self._GoReconnect(False)
6064 self._WaitUntilSync()
6066 self.feedback_fn("* done")
6068 def _RevertDiskStatus(self):
6069 """Try to revert the disk status after a failed migration.
6072 target_node = self.target_node
6074 self._EnsureSecondary(target_node)
6075 self._GoStandalone()
6076 self._GoReconnect(False)
6077 self._WaitUntilSync()
6078 except errors.OpExecError, err:
6079 self.lu.LogWarning("Migration failed and I can't reconnect the"
6080 " drives: error '%s'\n"
6081 "Please look and recover the instance status" %
6084 def _AbortMigration(self):
6085 """Call the hypervisor code to abort a started migration.
6088 instance = self.instance
6089 target_node = self.target_node
6090 migration_info = self.migration_info
6092 abort_result = self.rpc.call_finalize_migration(target_node,
6096 abort_msg = abort_result.fail_msg
6098 logging.error("Aborting migration failed on target node %s: %s",
6099 target_node, abort_msg)
6100 # Don't raise an exception here, as we stil have to try to revert the
6101 # disk status, even if this step failed.
6103 def _ExecMigration(self):
6104 """Migrate an instance.
6106 The migrate is done by:
6107 - change the disks into dual-master mode
6108 - wait until disks are fully synchronized again
6109 - migrate the instance
6110 - change disks on the new secondary node (the old primary) to secondary
6111 - wait until disks are fully synchronized
6112 - change disks into single-master mode
6115 instance = self.instance
6116 target_node = self.target_node
6117 source_node = self.source_node
6119 self.feedback_fn("* checking disk consistency between source and target")
6120 for dev in instance.disks:
6121 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6122 raise errors.OpExecError("Disk %s is degraded or not fully"
6123 " synchronized on target node,"
6124 " aborting migrate." % dev.iv_name)
6126 # First get the migration information from the remote node
6127 result = self.rpc.call_migration_info(source_node, instance)
6128 msg = result.fail_msg
6130 log_err = ("Failed fetching source migration information from %s: %s" %
6132 logging.error(log_err)
6133 raise errors.OpExecError(log_err)
6135 self.migration_info = migration_info = result.payload
6137 # Then switch the disks to master/master mode
6138 self._EnsureSecondary(target_node)
6139 self._GoStandalone()
6140 self._GoReconnect(True)
6141 self._WaitUntilSync()
6143 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6144 result = self.rpc.call_accept_instance(target_node,
6147 self.nodes_ip[target_node])
6149 msg = result.fail_msg
6151 logging.error("Instance pre-migration failed, trying to revert"
6152 " disk status: %s", msg)
6153 self.feedback_fn("Pre-migration failed, aborting")
6154 self._AbortMigration()
6155 self._RevertDiskStatus()
6156 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6157 (instance.name, msg))
6159 self.feedback_fn("* migrating instance to %s" % target_node)
6161 result = self.rpc.call_instance_migrate(source_node, instance,
6162 self.nodes_ip[target_node],
6164 msg = result.fail_msg
6166 logging.error("Instance migration failed, trying to revert"
6167 " disk status: %s", msg)
6168 self.feedback_fn("Migration failed, aborting")
6169 self._AbortMigration()
6170 self._RevertDiskStatus()
6171 raise errors.OpExecError("Could not migrate instance %s: %s" %
6172 (instance.name, msg))
6175 instance.primary_node = target_node
6176 # distribute new instance config to the other nodes
6177 self.cfg.Update(instance, self.feedback_fn)
6179 result = self.rpc.call_finalize_migration(target_node,
6183 msg = result.fail_msg
6185 logging.error("Instance migration succeeded, but finalization failed:"
6187 raise errors.OpExecError("Could not finalize instance migration: %s" %
6190 self._EnsureSecondary(source_node)
6191 self._WaitUntilSync()
6192 self._GoStandalone()
6193 self._GoReconnect(False)
6194 self._WaitUntilSync()
6196 self.feedback_fn("* done")
6198 def Exec(self, feedback_fn):
6199 """Perform the migration.
6202 feedback_fn("Migrating instance %s" % self.instance.name)
6204 self.feedback_fn = feedback_fn
6206 self.source_node = self.instance.primary_node
6207 self.target_node = self.instance.secondary_nodes[0]
6208 self.all_nodes = [self.source_node, self.target_node]
6210 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6211 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6215 return self._ExecCleanup()
6217 return self._ExecMigration()
6220 def _CreateBlockDev(lu, node, instance, device, force_create,
6222 """Create a tree of block devices on a given node.
6224 If this device type has to be created on secondaries, create it and
6227 If not, just recurse to children keeping the same 'force' value.
6229 @param lu: the lu on whose behalf we execute
6230 @param node: the node on which to create the device
6231 @type instance: L{objects.Instance}
6232 @param instance: the instance which owns the device
6233 @type device: L{objects.Disk}
6234 @param device: the device to create
6235 @type force_create: boolean
6236 @param force_create: whether to force creation of this device; this
6237 will be change to True whenever we find a device which has
6238 CreateOnSecondary() attribute
6239 @param info: the extra 'metadata' we should attach to the device
6240 (this will be represented as a LVM tag)
6241 @type force_open: boolean
6242 @param force_open: this parameter will be passes to the
6243 L{backend.BlockdevCreate} function where it specifies
6244 whether we run on primary or not, and it affects both
6245 the child assembly and the device own Open() execution
6248 if device.CreateOnSecondary():
6252 for child in device.children:
6253 _CreateBlockDev(lu, node, instance, child, force_create,
6256 if not force_create:
6259 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6262 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6263 """Create a single block device on a given node.
6265 This will not recurse over children of the device, so they must be
6268 @param lu: the lu on whose behalf we execute
6269 @param node: the node on which to create the device
6270 @type instance: L{objects.Instance}
6271 @param instance: the instance which owns the device
6272 @type device: L{objects.Disk}
6273 @param device: the device to create
6274 @param info: the extra 'metadata' we should attach to the device
6275 (this will be represented as a LVM tag)
6276 @type force_open: boolean
6277 @param force_open: this parameter will be passes to the
6278 L{backend.BlockdevCreate} function where it specifies
6279 whether we run on primary or not, and it affects both
6280 the child assembly and the device own Open() execution
6283 lu.cfg.SetDiskID(device, node)
6284 result = lu.rpc.call_blockdev_create(node, device, device.size,
6285 instance.name, force_open, info)
6286 result.Raise("Can't create block device %s on"
6287 " node %s for instance %s" % (device, node, instance.name))
6288 if device.physical_id is None:
6289 device.physical_id = result.payload
6292 def _GenerateUniqueNames(lu, exts):
6293 """Generate a suitable LV name.
6295 This will generate a logical volume name for the given instance.
6300 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6301 results.append("%s%s" % (new_id, val))
6305 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6307 """Generate a drbd8 device complete with its children.
6310 port = lu.cfg.AllocatePort()
6311 vgname = lu.cfg.GetVGName()
6312 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6313 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6314 logical_id=(vgname, names[0]))
6315 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6316 logical_id=(vgname, names[1]))
6317 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6318 logical_id=(primary, secondary, port,
6321 children=[dev_data, dev_meta],
6326 def _GenerateDiskTemplate(lu, template_name,
6327 instance_name, primary_node,
6328 secondary_nodes, disk_info,
6329 file_storage_dir, file_driver,
6331 """Generate the entire disk layout for a given template type.
6334 #TODO: compute space requirements
6336 vgname = lu.cfg.GetVGName()
6337 disk_count = len(disk_info)
6339 if template_name == constants.DT_DISKLESS:
6341 elif template_name == constants.DT_PLAIN:
6342 if len(secondary_nodes) != 0:
6343 raise errors.ProgrammerError("Wrong template configuration")
6345 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6346 for i in range(disk_count)])
6347 for idx, disk in enumerate(disk_info):
6348 disk_index = idx + base_index
6349 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6350 logical_id=(vgname, names[idx]),
6351 iv_name="disk/%d" % disk_index,
6353 disks.append(disk_dev)
6354 elif template_name == constants.DT_DRBD8:
6355 if len(secondary_nodes) != 1:
6356 raise errors.ProgrammerError("Wrong template configuration")
6357 remote_node = secondary_nodes[0]
6358 minors = lu.cfg.AllocateDRBDMinor(
6359 [primary_node, remote_node] * len(disk_info), instance_name)
6362 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6363 for i in range(disk_count)]):
6364 names.append(lv_prefix + "_data")
6365 names.append(lv_prefix + "_meta")
6366 for idx, disk in enumerate(disk_info):
6367 disk_index = idx + base_index
6368 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6369 disk["size"], names[idx*2:idx*2+2],
6370 "disk/%d" % disk_index,
6371 minors[idx*2], minors[idx*2+1])
6372 disk_dev.mode = disk["mode"]
6373 disks.append(disk_dev)
6374 elif template_name == constants.DT_FILE:
6375 if len(secondary_nodes) != 0:
6376 raise errors.ProgrammerError("Wrong template configuration")
6378 _RequireFileStorage()
6380 for idx, disk in enumerate(disk_info):
6381 disk_index = idx + base_index
6382 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6383 iv_name="disk/%d" % disk_index,
6384 logical_id=(file_driver,
6385 "%s/disk%d" % (file_storage_dir,
6388 disks.append(disk_dev)
6390 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6394 def _GetInstanceInfoText(instance):
6395 """Compute that text that should be added to the disk's metadata.
6398 return "originstname+%s" % instance.name
6401 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6402 """Create all disks for an instance.
6404 This abstracts away some work from AddInstance.
6406 @type lu: L{LogicalUnit}
6407 @param lu: the logical unit on whose behalf we execute
6408 @type instance: L{objects.Instance}
6409 @param instance: the instance whose disks we should create
6411 @param to_skip: list of indices to skip
6412 @type target_node: string
6413 @param target_node: if passed, overrides the target node for creation
6415 @return: the success of the creation
6418 info = _GetInstanceInfoText(instance)
6419 if target_node is None:
6420 pnode = instance.primary_node
6421 all_nodes = instance.all_nodes
6426 if instance.disk_template == constants.DT_FILE:
6427 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6428 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6430 result.Raise("Failed to create directory '%s' on"
6431 " node %s" % (file_storage_dir, pnode))
6433 # Note: this needs to be kept in sync with adding of disks in
6434 # LUSetInstanceParams
6435 for idx, device in enumerate(instance.disks):
6436 if to_skip and idx in to_skip:
6438 logging.info("Creating volume %s for instance %s",
6439 device.iv_name, instance.name)
6441 for node in all_nodes:
6442 f_create = node == pnode
6443 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6446 def _RemoveDisks(lu, instance, target_node=None):
6447 """Remove all disks for an instance.
6449 This abstracts away some work from `AddInstance()` and
6450 `RemoveInstance()`. Note that in case some of the devices couldn't
6451 be removed, the removal will continue with the other ones (compare
6452 with `_CreateDisks()`).
6454 @type lu: L{LogicalUnit}
6455 @param lu: the logical unit on whose behalf we execute
6456 @type instance: L{objects.Instance}
6457 @param instance: the instance whose disks we should remove
6458 @type target_node: string
6459 @param target_node: used to override the node on which to remove the disks
6461 @return: the success of the removal
6464 logging.info("Removing block devices for instance %s", instance.name)
6467 for device in instance.disks:
6469 edata = [(target_node, device)]
6471 edata = device.ComputeNodeTree(instance.primary_node)
6472 for node, disk in edata:
6473 lu.cfg.SetDiskID(disk, node)
6474 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6476 lu.LogWarning("Could not remove block device %s on node %s,"
6477 " continuing anyway: %s", device.iv_name, node, msg)
6480 if instance.disk_template == constants.DT_FILE:
6481 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6485 tgt = instance.primary_node
6486 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6488 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6489 file_storage_dir, instance.primary_node, result.fail_msg)
6495 def _ComputeDiskSize(disk_template, disks):
6496 """Compute disk size requirements in the volume group
6499 # Required free disk space as a function of disk and swap space
6501 constants.DT_DISKLESS: None,
6502 constants.DT_PLAIN: sum(d["size"] for d in disks),
6503 # 128 MB are added for drbd metadata for each disk
6504 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6505 constants.DT_FILE: None,
6508 if disk_template not in req_size_dict:
6509 raise errors.ProgrammerError("Disk template '%s' size requirement"
6510 " is unknown" % disk_template)
6512 return req_size_dict[disk_template]
6515 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6516 """Hypervisor parameter validation.
6518 This function abstract the hypervisor parameter validation to be
6519 used in both instance create and instance modify.
6521 @type lu: L{LogicalUnit}
6522 @param lu: the logical unit for which we check
6523 @type nodenames: list
6524 @param nodenames: the list of nodes on which we should check
6525 @type hvname: string
6526 @param hvname: the name of the hypervisor we should use
6527 @type hvparams: dict
6528 @param hvparams: the parameters which we need to check
6529 @raise errors.OpPrereqError: if the parameters are not valid
6532 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6535 for node in nodenames:
6539 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6542 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6543 """OS parameters validation.
6545 @type lu: L{LogicalUnit}
6546 @param lu: the logical unit for which we check
6547 @type required: boolean
6548 @param required: whether the validation should fail if the OS is not
6550 @type nodenames: list
6551 @param nodenames: the list of nodes on which we should check
6552 @type osname: string
6553 @param osname: the name of the hypervisor we should use
6554 @type osparams: dict
6555 @param osparams: the parameters which we need to check
6556 @raise errors.OpPrereqError: if the parameters are not valid
6559 result = lu.rpc.call_os_validate(required, nodenames, osname,
6560 [constants.OS_VALIDATE_PARAMETERS],
6562 for node, nres in result.items():
6563 # we don't check for offline cases since this should be run only
6564 # against the master node and/or an instance's nodes
6565 nres.Raise("OS Parameters validation failed on node %s" % node)
6566 if not nres.payload:
6567 lu.LogInfo("OS %s not found on node %s, validation skipped",
6571 class LUCreateInstance(LogicalUnit):
6572 """Create an instance.
6575 HPATH = "instance-add"
6576 HTYPE = constants.HTYPE_INSTANCE
6579 ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6580 ("start", True, _TBool),
6581 ("wait_for_sync", True, _TBool),
6582 ("ip_check", True, _TBool),
6583 ("name_check", True, _TBool),
6584 ("disks", _NoDefault, _TListOf(_TDict)),
6585 ("nics", _NoDefault, _TListOf(_TDict)),
6586 ("hvparams", _EmptyDict, _TDict),
6587 ("beparams", _EmptyDict, _TDict),
6588 ("osparams", _EmptyDict, _TDict),
6589 ("no_install", None, _TMaybeBool),
6590 ("os_type", None, _TMaybeString),
6591 ("force_variant", False, _TBool),
6592 ("source_handshake", None, _TOr(_TList, _TNone)),
6593 ("source_x509_ca", None, _TMaybeString),
6594 ("source_instance_name", None, _TMaybeString),
6595 ("src_node", None, _TMaybeString),
6596 ("src_path", None, _TMaybeString),
6597 ("pnode", None, _TMaybeString),
6598 ("snode", None, _TMaybeString),
6599 ("iallocator", None, _TMaybeString),
6600 ("hypervisor", None, _TMaybeString),
6601 ("disk_template", _NoDefault, _CheckDiskTemplate),
6602 ("identify_defaults", False, _TBool),
6603 ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6604 ("file_storage_dir", None, _TMaybeString),
6608 def CheckArguments(self):
6612 # do not require name_check to ease forward/backward compatibility
6614 if self.op.no_install and self.op.start:
6615 self.LogInfo("No-installation mode selected, disabling startup")
6616 self.op.start = False
6617 # validate/normalize the instance name
6618 self.op.instance_name = \
6619 netutils.Hostname.GetNormalizedName(self.op.instance_name)
6621 if self.op.ip_check and not self.op.name_check:
6622 # TODO: make the ip check more flexible and not depend on the name check
6623 raise errors.OpPrereqError("Cannot do ip check without a name check",
6626 # check nics' parameter names
6627 for nic in self.op.nics:
6628 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6630 # check disks. parameter names and consistent adopt/no-adopt strategy
6631 has_adopt = has_no_adopt = False
6632 for disk in self.op.disks:
6633 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6638 if has_adopt and has_no_adopt:
6639 raise errors.OpPrereqError("Either all disks are adopted or none is",
6642 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6643 raise errors.OpPrereqError("Disk adoption is not supported for the"
6644 " '%s' disk template" %
6645 self.op.disk_template,
6647 if self.op.iallocator is not None:
6648 raise errors.OpPrereqError("Disk adoption not allowed with an"
6649 " iallocator script", errors.ECODE_INVAL)
6650 if self.op.mode == constants.INSTANCE_IMPORT:
6651 raise errors.OpPrereqError("Disk adoption not allowed for"
6652 " instance import", errors.ECODE_INVAL)
6654 self.adopt_disks = has_adopt
6656 # instance name verification
6657 if self.op.name_check:
6658 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6659 self.op.instance_name = self.hostname1.name
6660 # used in CheckPrereq for ip ping check
6661 self.check_ip = self.hostname1.ip
6662 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6663 raise errors.OpPrereqError("Remote imports require names to be checked" %
6666 self.check_ip = None
6668 # file storage checks
6669 if (self.op.file_driver and
6670 not self.op.file_driver in constants.FILE_DRIVER):
6671 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6672 self.op.file_driver, errors.ECODE_INVAL)
6674 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6675 raise errors.OpPrereqError("File storage directory path not absolute",
6678 ### Node/iallocator related checks
6679 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6681 if self.op.pnode is not None:
6682 if self.op.disk_template in constants.DTS_NET_MIRROR:
6683 if self.op.snode is None:
6684 raise errors.OpPrereqError("The networked disk templates need"
6685 " a mirror node", errors.ECODE_INVAL)
6687 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6689 self.op.snode = None
6691 self._cds = _GetClusterDomainSecret()
6693 if self.op.mode == constants.INSTANCE_IMPORT:
6694 # On import force_variant must be True, because if we forced it at
6695 # initial install, our only chance when importing it back is that it
6697 self.op.force_variant = True
6699 if self.op.no_install:
6700 self.LogInfo("No-installation mode has no effect during import")
6702 elif self.op.mode == constants.INSTANCE_CREATE:
6703 if self.op.os_type is None:
6704 raise errors.OpPrereqError("No guest OS specified",
6706 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6707 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6708 " installation" % self.op.os_type,
6710 if self.op.disk_template is None:
6711 raise errors.OpPrereqError("No disk template specified",
6714 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6715 # Check handshake to ensure both clusters have the same domain secret
6716 src_handshake = self.op.source_handshake
6717 if not src_handshake:
6718 raise errors.OpPrereqError("Missing source handshake",
6721 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6724 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6727 # Load and check source CA
6728 self.source_x509_ca_pem = self.op.source_x509_ca
6729 if not self.source_x509_ca_pem:
6730 raise errors.OpPrereqError("Missing source X509 CA",
6734 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6736 except OpenSSL.crypto.Error, err:
6737 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6738 (err, ), errors.ECODE_INVAL)
6740 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6741 if errcode is not None:
6742 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6745 self.source_x509_ca = cert
6747 src_instance_name = self.op.source_instance_name
6748 if not src_instance_name:
6749 raise errors.OpPrereqError("Missing source instance name",
6752 self.source_instance_name = \
6753 netutils.GetHostname(name=src_instance_name).name
6756 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6757 self.op.mode, errors.ECODE_INVAL)
6759 def ExpandNames(self):
6760 """ExpandNames for CreateInstance.
6762 Figure out the right locks for instance creation.
6765 self.needed_locks = {}
6767 instance_name = self.op.instance_name
6768 # this is just a preventive check, but someone might still add this
6769 # instance in the meantime, and creation will fail at lock-add time
6770 if instance_name in self.cfg.GetInstanceList():
6771 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6772 instance_name, errors.ECODE_EXISTS)
6774 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6776 if self.op.iallocator:
6777 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6779 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6780 nodelist = [self.op.pnode]
6781 if self.op.snode is not None:
6782 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6783 nodelist.append(self.op.snode)
6784 self.needed_locks[locking.LEVEL_NODE] = nodelist
6786 # in case of import lock the source node too
6787 if self.op.mode == constants.INSTANCE_IMPORT:
6788 src_node = self.op.src_node
6789 src_path = self.op.src_path
6791 if src_path is None:
6792 self.op.src_path = src_path = self.op.instance_name
6794 if src_node is None:
6795 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6796 self.op.src_node = None
6797 if os.path.isabs(src_path):
6798 raise errors.OpPrereqError("Importing an instance from an absolute"
6799 " path requires a source node option.",
6802 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6803 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6804 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6805 if not os.path.isabs(src_path):
6806 self.op.src_path = src_path = \
6807 utils.PathJoin(constants.EXPORT_DIR, src_path)
6809 def _RunAllocator(self):
6810 """Run the allocator based on input opcode.
6813 nics = [n.ToDict() for n in self.nics]
6814 ial = IAllocator(self.cfg, self.rpc,
6815 mode=constants.IALLOCATOR_MODE_ALLOC,
6816 name=self.op.instance_name,
6817 disk_template=self.op.disk_template,
6820 vcpus=self.be_full[constants.BE_VCPUS],
6821 mem_size=self.be_full[constants.BE_MEMORY],
6824 hypervisor=self.op.hypervisor,
6827 ial.Run(self.op.iallocator)
6830 raise errors.OpPrereqError("Can't compute nodes using"
6831 " iallocator '%s': %s" %
6832 (self.op.iallocator, ial.info),
6834 if len(ial.result) != ial.required_nodes:
6835 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6836 " of nodes (%s), required %s" %
6837 (self.op.iallocator, len(ial.result),
6838 ial.required_nodes), errors.ECODE_FAULT)
6839 self.op.pnode = ial.result[0]
6840 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6841 self.op.instance_name, self.op.iallocator,
6842 utils.CommaJoin(ial.result))
6843 if ial.required_nodes == 2:
6844 self.op.snode = ial.result[1]
6846 def BuildHooksEnv(self):
6849 This runs on master, primary and secondary nodes of the instance.
6853 "ADD_MODE": self.op.mode,
6855 if self.op.mode == constants.INSTANCE_IMPORT:
6856 env["SRC_NODE"] = self.op.src_node
6857 env["SRC_PATH"] = self.op.src_path
6858 env["SRC_IMAGES"] = self.src_images
6860 env.update(_BuildInstanceHookEnv(
6861 name=self.op.instance_name,
6862 primary_node=self.op.pnode,
6863 secondary_nodes=self.secondaries,
6864 status=self.op.start,
6865 os_type=self.op.os_type,
6866 memory=self.be_full[constants.BE_MEMORY],
6867 vcpus=self.be_full[constants.BE_VCPUS],
6868 nics=_NICListToTuple(self, self.nics),
6869 disk_template=self.op.disk_template,
6870 disks=[(d["size"], d["mode"]) for d in self.disks],
6873 hypervisor_name=self.op.hypervisor,
6876 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6880 def _ReadExportInfo(self):
6881 """Reads the export information from disk.
6883 It will override the opcode source node and path with the actual
6884 information, if these two were not specified before.
6886 @return: the export information
6889 assert self.op.mode == constants.INSTANCE_IMPORT
6891 src_node = self.op.src_node
6892 src_path = self.op.src_path
6894 if src_node is None:
6895 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6896 exp_list = self.rpc.call_export_list(locked_nodes)
6898 for node in exp_list:
6899 if exp_list[node].fail_msg:
6901 if src_path in exp_list[node].payload:
6903 self.op.src_node = src_node = node
6904 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6908 raise errors.OpPrereqError("No export found for relative path %s" %
6909 src_path, errors.ECODE_INVAL)
6911 _CheckNodeOnline(self, src_node)
6912 result = self.rpc.call_export_info(src_node, src_path)
6913 result.Raise("No export or invalid export found in dir %s" % src_path)
6915 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6916 if not export_info.has_section(constants.INISECT_EXP):
6917 raise errors.ProgrammerError("Corrupted export config",
6918 errors.ECODE_ENVIRON)
6920 ei_version = export_info.get(constants.INISECT_EXP, "version")
6921 if (int(ei_version) != constants.EXPORT_VERSION):
6922 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6923 (ei_version, constants.EXPORT_VERSION),
6924 errors.ECODE_ENVIRON)
6927 def _ReadExportParams(self, einfo):
6928 """Use export parameters as defaults.
6930 In case the opcode doesn't specify (as in override) some instance
6931 parameters, then try to use them from the export information, if
6935 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6937 if self.op.disk_template is None:
6938 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6939 self.op.disk_template = einfo.get(constants.INISECT_INS,
6942 raise errors.OpPrereqError("No disk template specified and the export"
6943 " is missing the disk_template information",
6946 if not self.op.disks:
6947 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6949 # TODO: import the disk iv_name too
6950 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6951 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6952 disks.append({"size": disk_sz})
6953 self.op.disks = disks
6955 raise errors.OpPrereqError("No disk info specified and the export"
6956 " is missing the disk information",
6959 if (not self.op.nics and
6960 einfo.has_option(constants.INISECT_INS, "nic_count")):
6962 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6964 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6965 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6970 if (self.op.hypervisor is None and
6971 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6972 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6973 if einfo.has_section(constants.INISECT_HYP):
6974 # use the export parameters but do not override the ones
6975 # specified by the user
6976 for name, value in einfo.items(constants.INISECT_HYP):
6977 if name not in self.op.hvparams:
6978 self.op.hvparams[name] = value
6980 if einfo.has_section(constants.INISECT_BEP):
6981 # use the parameters, without overriding
6982 for name, value in einfo.items(constants.INISECT_BEP):
6983 if name not in self.op.beparams:
6984 self.op.beparams[name] = value
6986 # try to read the parameters old style, from the main section
6987 for name in constants.BES_PARAMETERS:
6988 if (name not in self.op.beparams and
6989 einfo.has_option(constants.INISECT_INS, name)):
6990 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6992 if einfo.has_section(constants.INISECT_OSP):
6993 # use the parameters, without overriding
6994 for name, value in einfo.items(constants.INISECT_OSP):
6995 if name not in self.op.osparams:
6996 self.op.osparams[name] = value
6998 def _RevertToDefaults(self, cluster):
6999 """Revert the instance parameters to the default values.
7003 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7004 for name in self.op.hvparams.keys():
7005 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7006 del self.op.hvparams[name]
7008 be_defs = cluster.SimpleFillBE({})
7009 for name in self.op.beparams.keys():
7010 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7011 del self.op.beparams[name]
7013 nic_defs = cluster.SimpleFillNIC({})
7014 for nic in self.op.nics:
7015 for name in constants.NICS_PARAMETERS:
7016 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7019 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7020 for name in self.op.osparams.keys():
7021 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7022 del self.op.osparams[name]
7024 def CheckPrereq(self):
7025 """Check prerequisites.
7028 if self.op.mode == constants.INSTANCE_IMPORT:
7029 export_info = self._ReadExportInfo()
7030 self._ReadExportParams(export_info)
7032 _CheckDiskTemplate(self.op.disk_template)
7034 if (not self.cfg.GetVGName() and
7035 self.op.disk_template not in constants.DTS_NOT_LVM):
7036 raise errors.OpPrereqError("Cluster does not support lvm-based"
7037 " instances", errors.ECODE_STATE)
7039 if self.op.hypervisor is None:
7040 self.op.hypervisor = self.cfg.GetHypervisorType()
7042 cluster = self.cfg.GetClusterInfo()
7043 enabled_hvs = cluster.enabled_hypervisors
7044 if self.op.hypervisor not in enabled_hvs:
7045 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7046 " cluster (%s)" % (self.op.hypervisor,
7047 ",".join(enabled_hvs)),
7050 # check hypervisor parameter syntax (locally)
7051 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7052 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7054 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7055 hv_type.CheckParameterSyntax(filled_hvp)
7056 self.hv_full = filled_hvp
7057 # check that we don't specify global parameters on an instance
7058 _CheckGlobalHvParams(self.op.hvparams)
7060 # fill and remember the beparams dict
7061 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7062 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7064 # build os parameters
7065 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7067 # now that hvp/bep are in final format, let's reset to defaults,
7069 if self.op.identify_defaults:
7070 self._RevertToDefaults(cluster)
7074 for idx, nic in enumerate(self.op.nics):
7075 nic_mode_req = nic.get("mode", None)
7076 nic_mode = nic_mode_req
7077 if nic_mode is None:
7078 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7080 # in routed mode, for the first nic, the default ip is 'auto'
7081 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7082 default_ip_mode = constants.VALUE_AUTO
7084 default_ip_mode = constants.VALUE_NONE
7086 # ip validity checks
7087 ip = nic.get("ip", default_ip_mode)
7088 if ip is None or ip.lower() == constants.VALUE_NONE:
7090 elif ip.lower() == constants.VALUE_AUTO:
7091 if not self.op.name_check:
7092 raise errors.OpPrereqError("IP address set to auto but name checks"
7093 " have been skipped",
7095 nic_ip = self.hostname1.ip
7097 if not netutils.IPAddress.IsValid(ip):
7098 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7102 # TODO: check the ip address for uniqueness
7103 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7104 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7107 # MAC address verification
7108 mac = nic.get("mac", constants.VALUE_AUTO)
7109 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7110 mac = utils.NormalizeAndValidateMac(mac)
7113 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7114 except errors.ReservationError:
7115 raise errors.OpPrereqError("MAC address %s already in use"
7116 " in cluster" % mac,
7117 errors.ECODE_NOTUNIQUE)
7119 # bridge verification
7120 bridge = nic.get("bridge", None)
7121 link = nic.get("link", None)
7123 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7124 " at the same time", errors.ECODE_INVAL)
7125 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7126 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7133 nicparams[constants.NIC_MODE] = nic_mode_req
7135 nicparams[constants.NIC_LINK] = link
7137 check_params = cluster.SimpleFillNIC(nicparams)
7138 objects.NIC.CheckParameterSyntax(check_params)
7139 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7141 # disk checks/pre-build
7143 for disk in self.op.disks:
7144 mode = disk.get("mode", constants.DISK_RDWR)
7145 if mode not in constants.DISK_ACCESS_SET:
7146 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7147 mode, errors.ECODE_INVAL)
7148 size = disk.get("size", None)
7150 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7153 except (TypeError, ValueError):
7154 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7156 new_disk = {"size": size, "mode": mode}
7158 new_disk["adopt"] = disk["adopt"]
7159 self.disks.append(new_disk)
7161 if self.op.mode == constants.INSTANCE_IMPORT:
7163 # Check that the new instance doesn't have less disks than the export
7164 instance_disks = len(self.disks)
7165 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7166 if instance_disks < export_disks:
7167 raise errors.OpPrereqError("Not enough disks to import."
7168 " (instance: %d, export: %d)" %
7169 (instance_disks, export_disks),
7173 for idx in range(export_disks):
7174 option = 'disk%d_dump' % idx
7175 if export_info.has_option(constants.INISECT_INS, option):
7176 # FIXME: are the old os-es, disk sizes, etc. useful?
7177 export_name = export_info.get(constants.INISECT_INS, option)
7178 image = utils.PathJoin(self.op.src_path, export_name)
7179 disk_images.append(image)
7181 disk_images.append(False)
7183 self.src_images = disk_images
7185 old_name = export_info.get(constants.INISECT_INS, 'name')
7187 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7188 except (TypeError, ValueError), err:
7189 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7190 " an integer: %s" % str(err),
7192 if self.op.instance_name == old_name:
7193 for idx, nic in enumerate(self.nics):
7194 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7195 nic_mac_ini = 'nic%d_mac' % idx
7196 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7198 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7200 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7201 if self.op.ip_check:
7202 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7203 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7204 (self.check_ip, self.op.instance_name),
7205 errors.ECODE_NOTUNIQUE)
7207 #### mac address generation
7208 # By generating here the mac address both the allocator and the hooks get
7209 # the real final mac address rather than the 'auto' or 'generate' value.
7210 # There is a race condition between the generation and the instance object
7211 # creation, which means that we know the mac is valid now, but we're not
7212 # sure it will be when we actually add the instance. If things go bad
7213 # adding the instance will abort because of a duplicate mac, and the
7214 # creation job will fail.
7215 for nic in self.nics:
7216 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7217 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7221 if self.op.iallocator is not None:
7222 self._RunAllocator()
7224 #### node related checks
7226 # check primary node
7227 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7228 assert self.pnode is not None, \
7229 "Cannot retrieve locked node %s" % self.op.pnode
7231 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7232 pnode.name, errors.ECODE_STATE)
7234 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7235 pnode.name, errors.ECODE_STATE)
7237 self.secondaries = []
7239 # mirror node verification
7240 if self.op.disk_template in constants.DTS_NET_MIRROR:
7241 if self.op.snode == pnode.name:
7242 raise errors.OpPrereqError("The secondary node cannot be the"
7243 " primary node.", errors.ECODE_INVAL)
7244 _CheckNodeOnline(self, self.op.snode)
7245 _CheckNodeNotDrained(self, self.op.snode)
7246 self.secondaries.append(self.op.snode)
7248 nodenames = [pnode.name] + self.secondaries
7250 req_size = _ComputeDiskSize(self.op.disk_template,
7253 # Check lv size requirements, if not adopting
7254 if req_size is not None and not self.adopt_disks:
7255 _CheckNodesFreeDisk(self, nodenames, req_size)
7257 if self.adopt_disks: # instead, we must check the adoption data
7258 all_lvs = set([i["adopt"] for i in self.disks])
7259 if len(all_lvs) != len(self.disks):
7260 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7262 for lv_name in all_lvs:
7264 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7265 except errors.ReservationError:
7266 raise errors.OpPrereqError("LV named %s used by another instance" %
7267 lv_name, errors.ECODE_NOTUNIQUE)
7269 node_lvs = self.rpc.call_lv_list([pnode.name],
7270 self.cfg.GetVGName())[pnode.name]
7271 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7272 node_lvs = node_lvs.payload
7273 delta = all_lvs.difference(node_lvs.keys())
7275 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7276 utils.CommaJoin(delta),
7278 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7280 raise errors.OpPrereqError("Online logical volumes found, cannot"
7281 " adopt: %s" % utils.CommaJoin(online_lvs),
7283 # update the size of disk based on what is found
7284 for dsk in self.disks:
7285 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7287 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7289 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7290 # check OS parameters (remotely)
7291 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7293 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7295 # memory check on primary node
7297 _CheckNodeFreeMemory(self, self.pnode.name,
7298 "creating instance %s" % self.op.instance_name,
7299 self.be_full[constants.BE_MEMORY],
7302 self.dry_run_result = list(nodenames)
7304 def Exec(self, feedback_fn):
7305 """Create and add the instance to the cluster.
7308 instance = self.op.instance_name
7309 pnode_name = self.pnode.name
7311 ht_kind = self.op.hypervisor
7312 if ht_kind in constants.HTS_REQ_PORT:
7313 network_port = self.cfg.AllocatePort()
7317 if constants.ENABLE_FILE_STORAGE:
7318 # this is needed because os.path.join does not accept None arguments
7319 if self.op.file_storage_dir is None:
7320 string_file_storage_dir = ""
7322 string_file_storage_dir = self.op.file_storage_dir
7324 # build the full file storage dir path
7325 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7326 string_file_storage_dir, instance)
7328 file_storage_dir = ""
7330 disks = _GenerateDiskTemplate(self,
7331 self.op.disk_template,
7332 instance, pnode_name,
7336 self.op.file_driver,
7339 iobj = objects.Instance(name=instance, os=self.op.os_type,
7340 primary_node=pnode_name,
7341 nics=self.nics, disks=disks,
7342 disk_template=self.op.disk_template,
7344 network_port=network_port,
7345 beparams=self.op.beparams,
7346 hvparams=self.op.hvparams,
7347 hypervisor=self.op.hypervisor,
7348 osparams=self.op.osparams,
7351 if self.adopt_disks:
7352 # rename LVs to the newly-generated names; we need to construct
7353 # 'fake' LV disks with the old data, plus the new unique_id
7354 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7356 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7357 rename_to.append(t_dsk.logical_id)
7358 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7359 self.cfg.SetDiskID(t_dsk, pnode_name)
7360 result = self.rpc.call_blockdev_rename(pnode_name,
7361 zip(tmp_disks, rename_to))
7362 result.Raise("Failed to rename adoped LVs")
7364 feedback_fn("* creating instance disks...")
7366 _CreateDisks(self, iobj)
7367 except errors.OpExecError:
7368 self.LogWarning("Device creation failed, reverting...")
7370 _RemoveDisks(self, iobj)
7372 self.cfg.ReleaseDRBDMinors(instance)
7375 feedback_fn("adding instance %s to cluster config" % instance)
7377 self.cfg.AddInstance(iobj, self.proc.GetECId())
7379 # Declare that we don't want to remove the instance lock anymore, as we've
7380 # added the instance to the config
7381 del self.remove_locks[locking.LEVEL_INSTANCE]
7382 # Unlock all the nodes
7383 if self.op.mode == constants.INSTANCE_IMPORT:
7384 nodes_keep = [self.op.src_node]
7385 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7386 if node != self.op.src_node]
7387 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7388 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7390 self.context.glm.release(locking.LEVEL_NODE)
7391 del self.acquired_locks[locking.LEVEL_NODE]
7393 if self.op.wait_for_sync:
7394 disk_abort = not _WaitForSync(self, iobj)
7395 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7396 # make sure the disks are not degraded (still sync-ing is ok)
7398 feedback_fn("* checking mirrors status")
7399 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7404 _RemoveDisks(self, iobj)
7405 self.cfg.RemoveInstance(iobj.name)
7406 # Make sure the instance lock gets removed
7407 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7408 raise errors.OpExecError("There are some degraded disks for"
7411 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7412 if self.op.mode == constants.INSTANCE_CREATE:
7413 if not self.op.no_install:
7414 feedback_fn("* running the instance OS create scripts...")
7415 # FIXME: pass debug option from opcode to backend
7416 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7417 self.op.debug_level)
7418 result.Raise("Could not add os for instance %s"
7419 " on node %s" % (instance, pnode_name))
7421 elif self.op.mode == constants.INSTANCE_IMPORT:
7422 feedback_fn("* running the instance OS import scripts...")
7426 for idx, image in enumerate(self.src_images):
7430 # FIXME: pass debug option from opcode to backend
7431 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7432 constants.IEIO_FILE, (image, ),
7433 constants.IEIO_SCRIPT,
7434 (iobj.disks[idx], idx),
7436 transfers.append(dt)
7439 masterd.instance.TransferInstanceData(self, feedback_fn,
7440 self.op.src_node, pnode_name,
7441 self.pnode.secondary_ip,
7443 if not compat.all(import_result):
7444 self.LogWarning("Some disks for instance %s on node %s were not"
7445 " imported successfully" % (instance, pnode_name))
7447 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7448 feedback_fn("* preparing remote import...")
7449 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7450 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7452 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7453 self.source_x509_ca,
7454 self._cds, timeouts)
7455 if not compat.all(disk_results):
7456 # TODO: Should the instance still be started, even if some disks
7457 # failed to import (valid for local imports, too)?
7458 self.LogWarning("Some disks for instance %s on node %s were not"
7459 " imported successfully" % (instance, pnode_name))
7461 # Run rename script on newly imported instance
7462 assert iobj.name == instance
7463 feedback_fn("Running rename script for %s" % instance)
7464 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7465 self.source_instance_name,
7466 self.op.debug_level)
7468 self.LogWarning("Failed to run rename script for %s on node"
7469 " %s: %s" % (instance, pnode_name, result.fail_msg))
7472 # also checked in the prereq part
7473 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7477 iobj.admin_up = True
7478 self.cfg.Update(iobj, feedback_fn)
7479 logging.info("Starting instance %s on node %s", instance, pnode_name)
7480 feedback_fn("* starting instance...")
7481 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7482 result.Raise("Could not start instance")
7484 return list(iobj.all_nodes)
7487 class LUConnectConsole(NoHooksLU):
7488 """Connect to an instance's console.
7490 This is somewhat special in that it returns the command line that
7491 you need to run on the master node in order to connect to the
7500 def ExpandNames(self):
7501 self._ExpandAndLockInstance()
7503 def CheckPrereq(self):
7504 """Check prerequisites.
7506 This checks that the instance is in the cluster.
7509 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7510 assert self.instance is not None, \
7511 "Cannot retrieve locked instance %s" % self.op.instance_name
7512 _CheckNodeOnline(self, self.instance.primary_node)
7514 def Exec(self, feedback_fn):
7515 """Connect to the console of an instance
7518 instance = self.instance
7519 node = instance.primary_node
7521 node_insts = self.rpc.call_instance_list([node],
7522 [instance.hypervisor])[node]
7523 node_insts.Raise("Can't get node information from %s" % node)
7525 if instance.name not in node_insts.payload:
7526 raise errors.OpExecError("Instance %s is not running." % instance.name)
7528 logging.debug("Connecting to console of %s on %s", instance.name, node)
7530 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7531 cluster = self.cfg.GetClusterInfo()
7532 # beparams and hvparams are passed separately, to avoid editing the
7533 # instance and then saving the defaults in the instance itself.
7534 hvparams = cluster.FillHV(instance)
7535 beparams = cluster.FillBE(instance)
7536 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7539 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7542 class LUReplaceDisks(LogicalUnit):
7543 """Replace the disks of an instance.
7546 HPATH = "mirrors-replace"
7547 HTYPE = constants.HTYPE_INSTANCE
7550 ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7551 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7552 ("remote_node", None, _TMaybeString),
7553 ("iallocator", None, _TMaybeString),
7554 ("early_release", False, _TBool),
7558 def CheckArguments(self):
7559 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7562 def ExpandNames(self):
7563 self._ExpandAndLockInstance()
7565 if self.op.iallocator is not None:
7566 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7568 elif self.op.remote_node is not None:
7569 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7570 self.op.remote_node = remote_node
7572 # Warning: do not remove the locking of the new secondary here
7573 # unless DRBD8.AddChildren is changed to work in parallel;
7574 # currently it doesn't since parallel invocations of
7575 # FindUnusedMinor will conflict
7576 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7577 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7580 self.needed_locks[locking.LEVEL_NODE] = []
7581 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7583 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7584 self.op.iallocator, self.op.remote_node,
7585 self.op.disks, False, self.op.early_release)
7587 self.tasklets = [self.replacer]
7589 def DeclareLocks(self, level):
7590 # If we're not already locking all nodes in the set we have to declare the
7591 # instance's primary/secondary nodes.
7592 if (level == locking.LEVEL_NODE and
7593 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7594 self._LockInstancesNodes()
7596 def BuildHooksEnv(self):
7599 This runs on the master, the primary and all the secondaries.
7602 instance = self.replacer.instance
7604 "MODE": self.op.mode,
7605 "NEW_SECONDARY": self.op.remote_node,
7606 "OLD_SECONDARY": instance.secondary_nodes[0],
7608 env.update(_BuildInstanceHookEnvByObject(self, instance))
7610 self.cfg.GetMasterNode(),
7611 instance.primary_node,
7613 if self.op.remote_node is not None:
7614 nl.append(self.op.remote_node)
7618 class TLReplaceDisks(Tasklet):
7619 """Replaces disks for an instance.
7621 Note: Locking is not within the scope of this class.
7624 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7625 disks, delay_iallocator, early_release):
7626 """Initializes this class.
7629 Tasklet.__init__(self, lu)
7632 self.instance_name = instance_name
7634 self.iallocator_name = iallocator_name
7635 self.remote_node = remote_node
7637 self.delay_iallocator = delay_iallocator
7638 self.early_release = early_release
7641 self.instance = None
7642 self.new_node = None
7643 self.target_node = None
7644 self.other_node = None
7645 self.remote_node_info = None
7646 self.node_secondary_ip = None
7649 def CheckArguments(mode, remote_node, iallocator):
7650 """Helper function for users of this class.
7653 # check for valid parameter combination
7654 if mode == constants.REPLACE_DISK_CHG:
7655 if remote_node is None and iallocator is None:
7656 raise errors.OpPrereqError("When changing the secondary either an"
7657 " iallocator script must be used or the"
7658 " new node given", errors.ECODE_INVAL)
7660 if remote_node is not None and iallocator is not None:
7661 raise errors.OpPrereqError("Give either the iallocator or the new"
7662 " secondary, not both", errors.ECODE_INVAL)
7664 elif remote_node is not None or iallocator is not None:
7665 # Not replacing the secondary
7666 raise errors.OpPrereqError("The iallocator and new node options can"
7667 " only be used when changing the"
7668 " secondary node", errors.ECODE_INVAL)
7671 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7672 """Compute a new secondary node using an IAllocator.
7675 ial = IAllocator(lu.cfg, lu.rpc,
7676 mode=constants.IALLOCATOR_MODE_RELOC,
7678 relocate_from=relocate_from)
7680 ial.Run(iallocator_name)
7683 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7684 " %s" % (iallocator_name, ial.info),
7687 if len(ial.result) != ial.required_nodes:
7688 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7689 " of nodes (%s), required %s" %
7691 len(ial.result), ial.required_nodes),
7694 remote_node_name = ial.result[0]
7696 lu.LogInfo("Selected new secondary for instance '%s': %s",
7697 instance_name, remote_node_name)
7699 return remote_node_name
7701 def _FindFaultyDisks(self, node_name):
7702 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7705 def CheckPrereq(self):
7706 """Check prerequisites.
7708 This checks that the instance is in the cluster.
7711 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7712 assert instance is not None, \
7713 "Cannot retrieve locked instance %s" % self.instance_name
7715 if instance.disk_template != constants.DT_DRBD8:
7716 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7717 " instances", errors.ECODE_INVAL)
7719 if len(instance.secondary_nodes) != 1:
7720 raise errors.OpPrereqError("The instance has a strange layout,"
7721 " expected one secondary but found %d" %
7722 len(instance.secondary_nodes),
7725 if not self.delay_iallocator:
7726 self._CheckPrereq2()
7728 def _CheckPrereq2(self):
7729 """Check prerequisites, second part.
7731 This function should always be part of CheckPrereq. It was separated and is
7732 now called from Exec because during node evacuation iallocator was only
7733 called with an unmodified cluster model, not taking planned changes into
7737 instance = self.instance
7738 secondary_node = instance.secondary_nodes[0]
7740 if self.iallocator_name is None:
7741 remote_node = self.remote_node
7743 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7744 instance.name, instance.secondary_nodes)
7746 if remote_node is not None:
7747 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7748 assert self.remote_node_info is not None, \
7749 "Cannot retrieve locked node %s" % remote_node
7751 self.remote_node_info = None
7753 if remote_node == self.instance.primary_node:
7754 raise errors.OpPrereqError("The specified node is the primary node of"
7755 " the instance.", errors.ECODE_INVAL)
7757 if remote_node == secondary_node:
7758 raise errors.OpPrereqError("The specified node is already the"
7759 " secondary node of the instance.",
7762 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7763 constants.REPLACE_DISK_CHG):
7764 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7767 if self.mode == constants.REPLACE_DISK_AUTO:
7768 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7769 faulty_secondary = self._FindFaultyDisks(secondary_node)
7771 if faulty_primary and faulty_secondary:
7772 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7773 " one node and can not be repaired"
7774 " automatically" % self.instance_name,
7778 self.disks = faulty_primary
7779 self.target_node = instance.primary_node
7780 self.other_node = secondary_node
7781 check_nodes = [self.target_node, self.other_node]
7782 elif faulty_secondary:
7783 self.disks = faulty_secondary
7784 self.target_node = secondary_node
7785 self.other_node = instance.primary_node
7786 check_nodes = [self.target_node, self.other_node]
7792 # Non-automatic modes
7793 if self.mode == constants.REPLACE_DISK_PRI:
7794 self.target_node = instance.primary_node
7795 self.other_node = secondary_node
7796 check_nodes = [self.target_node, self.other_node]
7798 elif self.mode == constants.REPLACE_DISK_SEC:
7799 self.target_node = secondary_node
7800 self.other_node = instance.primary_node
7801 check_nodes = [self.target_node, self.other_node]
7803 elif self.mode == constants.REPLACE_DISK_CHG:
7804 self.new_node = remote_node
7805 self.other_node = instance.primary_node
7806 self.target_node = secondary_node
7807 check_nodes = [self.new_node, self.other_node]
7809 _CheckNodeNotDrained(self.lu, remote_node)
7811 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7812 assert old_node_info is not None
7813 if old_node_info.offline and not self.early_release:
7814 # doesn't make sense to delay the release
7815 self.early_release = True
7816 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7817 " early-release mode", secondary_node)
7820 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7823 # If not specified all disks should be replaced
7825 self.disks = range(len(self.instance.disks))
7827 for node in check_nodes:
7828 _CheckNodeOnline(self.lu, node)
7830 # Check whether disks are valid
7831 for disk_idx in self.disks:
7832 instance.FindDisk(disk_idx)
7834 # Get secondary node IP addresses
7837 for node_name in [self.target_node, self.other_node, self.new_node]:
7838 if node_name is not None:
7839 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7841 self.node_secondary_ip = node_2nd_ip
7843 def Exec(self, feedback_fn):
7844 """Execute disk replacement.
7846 This dispatches the disk replacement to the appropriate handler.
7849 if self.delay_iallocator:
7850 self._CheckPrereq2()
7853 feedback_fn("No disks need replacement")
7856 feedback_fn("Replacing disk(s) %s for %s" %
7857 (utils.CommaJoin(self.disks), self.instance.name))
7859 activate_disks = (not self.instance.admin_up)
7861 # Activate the instance disks if we're replacing them on a down instance
7863 _StartInstanceDisks(self.lu, self.instance, True)
7866 # Should we replace the secondary node?
7867 if self.new_node is not None:
7868 fn = self._ExecDrbd8Secondary
7870 fn = self._ExecDrbd8DiskOnly
7872 return fn(feedback_fn)
7875 # Deactivate the instance disks if we're replacing them on a
7878 _SafeShutdownInstanceDisks(self.lu, self.instance)
7880 def _CheckVolumeGroup(self, nodes):
7881 self.lu.LogInfo("Checking volume groups")
7883 vgname = self.cfg.GetVGName()
7885 # Make sure volume group exists on all involved nodes
7886 results = self.rpc.call_vg_list(nodes)
7888 raise errors.OpExecError("Can't list volume groups on the nodes")
7892 res.Raise("Error checking node %s" % node)
7893 if vgname not in res.payload:
7894 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7897 def _CheckDisksExistence(self, nodes):
7898 # Check disk existence
7899 for idx, dev in enumerate(self.instance.disks):
7900 if idx not in self.disks:
7904 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7905 self.cfg.SetDiskID(dev, node)
7907 result = self.rpc.call_blockdev_find(node, dev)
7909 msg = result.fail_msg
7910 if msg or not result.payload:
7912 msg = "disk not found"
7913 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7916 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7917 for idx, dev in enumerate(self.instance.disks):
7918 if idx not in self.disks:
7921 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7924 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7926 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7927 " replace disks for instance %s" %
7928 (node_name, self.instance.name))
7930 def _CreateNewStorage(self, node_name):
7931 vgname = self.cfg.GetVGName()
7934 for idx, dev in enumerate(self.instance.disks):
7935 if idx not in self.disks:
7938 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7940 self.cfg.SetDiskID(dev, node_name)
7942 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7943 names = _GenerateUniqueNames(self.lu, lv_names)
7945 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7946 logical_id=(vgname, names[0]))
7947 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7948 logical_id=(vgname, names[1]))
7950 new_lvs = [lv_data, lv_meta]
7951 old_lvs = dev.children
7952 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7954 # we pass force_create=True to force the LVM creation
7955 for new_lv in new_lvs:
7956 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7957 _GetInstanceInfoText(self.instance), False)
7961 def _CheckDevices(self, node_name, iv_names):
7962 for name, (dev, _, _) in iv_names.iteritems():
7963 self.cfg.SetDiskID(dev, node_name)
7965 result = self.rpc.call_blockdev_find(node_name, dev)
7967 msg = result.fail_msg
7968 if msg or not result.payload:
7970 msg = "disk not found"
7971 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7974 if result.payload.is_degraded:
7975 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7977 def _RemoveOldStorage(self, node_name, iv_names):
7978 for name, (_, old_lvs, _) in iv_names.iteritems():
7979 self.lu.LogInfo("Remove logical volumes for %s" % name)
7982 self.cfg.SetDiskID(lv, node_name)
7984 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7986 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7987 hint="remove unused LVs manually")
7989 def _ReleaseNodeLock(self, node_name):
7990 """Releases the lock for a given node."""
7991 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7993 def _ExecDrbd8DiskOnly(self, feedback_fn):
7994 """Replace a disk on the primary or secondary for DRBD 8.
7996 The algorithm for replace is quite complicated:
7998 1. for each disk to be replaced:
8000 1. create new LVs on the target node with unique names
8001 1. detach old LVs from the drbd device
8002 1. rename old LVs to name_replaced.<time_t>
8003 1. rename new LVs to old LVs
8004 1. attach the new LVs (with the old names now) to the drbd device
8006 1. wait for sync across all devices
8008 1. for each modified disk:
8010 1. remove old LVs (which have the name name_replaces.<time_t>)
8012 Failures are not very well handled.
8017 # Step: check device activation
8018 self.lu.LogStep(1, steps_total, "Check device existence")
8019 self._CheckDisksExistence([self.other_node, self.target_node])
8020 self._CheckVolumeGroup([self.target_node, self.other_node])
8022 # Step: check other node consistency
8023 self.lu.LogStep(2, steps_total, "Check peer consistency")
8024 self._CheckDisksConsistency(self.other_node,
8025 self.other_node == self.instance.primary_node,
8028 # Step: create new storage
8029 self.lu.LogStep(3, steps_total, "Allocate new storage")
8030 iv_names = self._CreateNewStorage(self.target_node)
8032 # Step: for each lv, detach+rename*2+attach
8033 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8034 for dev, old_lvs, new_lvs in iv_names.itervalues():
8035 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8037 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8039 result.Raise("Can't detach drbd from local storage on node"
8040 " %s for device %s" % (self.target_node, dev.iv_name))
8042 #cfg.Update(instance)
8044 # ok, we created the new LVs, so now we know we have the needed
8045 # storage; as such, we proceed on the target node to rename
8046 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8047 # using the assumption that logical_id == physical_id (which in
8048 # turn is the unique_id on that node)
8050 # FIXME(iustin): use a better name for the replaced LVs
8051 temp_suffix = int(time.time())
8052 ren_fn = lambda d, suff: (d.physical_id[0],
8053 d.physical_id[1] + "_replaced-%s" % suff)
8055 # Build the rename list based on what LVs exist on the node
8056 rename_old_to_new = []
8057 for to_ren in old_lvs:
8058 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8059 if not result.fail_msg and result.payload:
8061 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8063 self.lu.LogInfo("Renaming the old LVs on the target node")
8064 result = self.rpc.call_blockdev_rename(self.target_node,
8066 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8068 # Now we rename the new LVs to the old LVs
8069 self.lu.LogInfo("Renaming the new LVs on the target node")
8070 rename_new_to_old = [(new, old.physical_id)
8071 for old, new in zip(old_lvs, new_lvs)]
8072 result = self.rpc.call_blockdev_rename(self.target_node,
8074 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8076 for old, new in zip(old_lvs, new_lvs):
8077 new.logical_id = old.logical_id
8078 self.cfg.SetDiskID(new, self.target_node)
8080 for disk in old_lvs:
8081 disk.logical_id = ren_fn(disk, temp_suffix)
8082 self.cfg.SetDiskID(disk, self.target_node)
8084 # Now that the new lvs have the old name, we can add them to the device
8085 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8086 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8088 msg = result.fail_msg
8090 for new_lv in new_lvs:
8091 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8094 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8095 hint=("cleanup manually the unused logical"
8097 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8099 dev.children = new_lvs
8101 self.cfg.Update(self.instance, feedback_fn)
8104 if self.early_release:
8105 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8107 self._RemoveOldStorage(self.target_node, iv_names)
8108 # WARNING: we release both node locks here, do not do other RPCs
8109 # than WaitForSync to the primary node
8110 self._ReleaseNodeLock([self.target_node, self.other_node])
8113 # This can fail as the old devices are degraded and _WaitForSync
8114 # does a combined result over all disks, so we don't check its return value
8115 self.lu.LogStep(cstep, steps_total, "Sync devices")
8117 _WaitForSync(self.lu, self.instance)
8119 # Check all devices manually
8120 self._CheckDevices(self.instance.primary_node, iv_names)
8122 # Step: remove old storage
8123 if not self.early_release:
8124 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8126 self._RemoveOldStorage(self.target_node, iv_names)
8128 def _ExecDrbd8Secondary(self, feedback_fn):
8129 """Replace the secondary node for DRBD 8.
8131 The algorithm for replace is quite complicated:
8132 - for all disks of the instance:
8133 - create new LVs on the new node with same names
8134 - shutdown the drbd device on the old secondary
8135 - disconnect the drbd network on the primary
8136 - create the drbd device on the new secondary
8137 - network attach the drbd on the primary, using an artifice:
8138 the drbd code for Attach() will connect to the network if it
8139 finds a device which is connected to the good local disks but
8141 - wait for sync across all devices
8142 - remove all disks from the old secondary
8144 Failures are not very well handled.
8149 # Step: check device activation
8150 self.lu.LogStep(1, steps_total, "Check device existence")
8151 self._CheckDisksExistence([self.instance.primary_node])
8152 self._CheckVolumeGroup([self.instance.primary_node])
8154 # Step: check other node consistency
8155 self.lu.LogStep(2, steps_total, "Check peer consistency")
8156 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8158 # Step: create new storage
8159 self.lu.LogStep(3, steps_total, "Allocate new storage")
8160 for idx, dev in enumerate(self.instance.disks):
8161 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8162 (self.new_node, idx))
8163 # we pass force_create=True to force LVM creation
8164 for new_lv in dev.children:
8165 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8166 _GetInstanceInfoText(self.instance), False)
8168 # Step 4: dbrd minors and drbd setups changes
8169 # after this, we must manually remove the drbd minors on both the
8170 # error and the success paths
8171 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8172 minors = self.cfg.AllocateDRBDMinor([self.new_node
8173 for dev in self.instance.disks],
8175 logging.debug("Allocated minors %r", minors)
8178 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8179 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8180 (self.new_node, idx))
8181 # create new devices on new_node; note that we create two IDs:
8182 # one without port, so the drbd will be activated without
8183 # networking information on the new node at this stage, and one
8184 # with network, for the latter activation in step 4
8185 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8186 if self.instance.primary_node == o_node1:
8189 assert self.instance.primary_node == o_node2, "Three-node instance?"
8192 new_alone_id = (self.instance.primary_node, self.new_node, None,
8193 p_minor, new_minor, o_secret)
8194 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8195 p_minor, new_minor, o_secret)
8197 iv_names[idx] = (dev, dev.children, new_net_id)
8198 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8200 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8201 logical_id=new_alone_id,
8202 children=dev.children,
8205 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8206 _GetInstanceInfoText(self.instance), False)
8207 except errors.GenericError:
8208 self.cfg.ReleaseDRBDMinors(self.instance.name)
8211 # We have new devices, shutdown the drbd on the old secondary
8212 for idx, dev in enumerate(self.instance.disks):
8213 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8214 self.cfg.SetDiskID(dev, self.target_node)
8215 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8217 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8218 "node: %s" % (idx, msg),
8219 hint=("Please cleanup this device manually as"
8220 " soon as possible"))
8222 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8223 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8224 self.node_secondary_ip,
8225 self.instance.disks)\
8226 [self.instance.primary_node]
8228 msg = result.fail_msg
8230 # detaches didn't succeed (unlikely)
8231 self.cfg.ReleaseDRBDMinors(self.instance.name)
8232 raise errors.OpExecError("Can't detach the disks from the network on"
8233 " old node: %s" % (msg,))
8235 # if we managed to detach at least one, we update all the disks of
8236 # the instance to point to the new secondary
8237 self.lu.LogInfo("Updating instance configuration")
8238 for dev, _, new_logical_id in iv_names.itervalues():
8239 dev.logical_id = new_logical_id
8240 self.cfg.SetDiskID(dev, self.instance.primary_node)
8242 self.cfg.Update(self.instance, feedback_fn)
8244 # and now perform the drbd attach
8245 self.lu.LogInfo("Attaching primary drbds to new secondary"
8246 " (standalone => connected)")
8247 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8249 self.node_secondary_ip,
8250 self.instance.disks,
8253 for to_node, to_result in result.items():
8254 msg = to_result.fail_msg
8256 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8258 hint=("please do a gnt-instance info to see the"
8259 " status of disks"))
8261 if self.early_release:
8262 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8264 self._RemoveOldStorage(self.target_node, iv_names)
8265 # WARNING: we release all node locks here, do not do other RPCs
8266 # than WaitForSync to the primary node
8267 self._ReleaseNodeLock([self.instance.primary_node,
8272 # This can fail as the old devices are degraded and _WaitForSync
8273 # does a combined result over all disks, so we don't check its return value
8274 self.lu.LogStep(cstep, steps_total, "Sync devices")
8276 _WaitForSync(self.lu, self.instance)
8278 # Check all devices manually
8279 self._CheckDevices(self.instance.primary_node, iv_names)
8281 # Step: remove old storage
8282 if not self.early_release:
8283 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8284 self._RemoveOldStorage(self.target_node, iv_names)
8287 class LURepairNodeStorage(NoHooksLU):
8288 """Repairs the volume group on a node.
8293 ("storage_type", _NoDefault, _CheckStorageType),
8294 ("name", _NoDefault, _TNonEmptyString),
8295 ("ignore_consistency", False, _TBool),
8299 def CheckArguments(self):
8300 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8302 storage_type = self.op.storage_type
8304 if (constants.SO_FIX_CONSISTENCY not in
8305 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8306 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8307 " repaired" % storage_type,
8310 def ExpandNames(self):
8311 self.needed_locks = {
8312 locking.LEVEL_NODE: [self.op.node_name],
8315 def _CheckFaultyDisks(self, instance, node_name):
8316 """Ensure faulty disks abort the opcode or at least warn."""
8318 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8320 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8321 " node '%s'" % (instance.name, node_name),
8323 except errors.OpPrereqError, err:
8324 if self.op.ignore_consistency:
8325 self.proc.LogWarning(str(err.args[0]))
8329 def CheckPrereq(self):
8330 """Check prerequisites.
8333 # Check whether any instance on this node has faulty disks
8334 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8335 if not inst.admin_up:
8337 check_nodes = set(inst.all_nodes)
8338 check_nodes.discard(self.op.node_name)
8339 for inst_node_name in check_nodes:
8340 self._CheckFaultyDisks(inst, inst_node_name)
8342 def Exec(self, feedback_fn):
8343 feedback_fn("Repairing storage unit '%s' on %s ..." %
8344 (self.op.name, self.op.node_name))
8346 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8347 result = self.rpc.call_storage_execute(self.op.node_name,
8348 self.op.storage_type, st_args,
8350 constants.SO_FIX_CONSISTENCY)
8351 result.Raise("Failed to repair storage unit '%s' on %s" %
8352 (self.op.name, self.op.node_name))
8355 class LUNodeEvacuationStrategy(NoHooksLU):
8356 """Computes the node evacuation strategy.
8360 ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8361 ("remote_node", None, _TMaybeString),
8362 ("iallocator", None, _TMaybeString),
8366 def CheckArguments(self):
8367 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8369 def ExpandNames(self):
8370 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8371 self.needed_locks = locks = {}
8372 if self.op.remote_node is None:
8373 locks[locking.LEVEL_NODE] = locking.ALL_SET
8375 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8376 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8378 def Exec(self, feedback_fn):
8379 if self.op.remote_node is not None:
8381 for node in self.op.nodes:
8382 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8385 if i.primary_node == self.op.remote_node:
8386 raise errors.OpPrereqError("Node %s is the primary node of"
8387 " instance %s, cannot use it as"
8389 (self.op.remote_node, i.name),
8391 result.append([i.name, self.op.remote_node])
8393 ial = IAllocator(self.cfg, self.rpc,
8394 mode=constants.IALLOCATOR_MODE_MEVAC,
8395 evac_nodes=self.op.nodes)
8396 ial.Run(self.op.iallocator, validate=True)
8398 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8404 class LUGrowDisk(LogicalUnit):
8405 """Grow a disk of an instance.
8409 HTYPE = constants.HTYPE_INSTANCE
8412 ("disk", _NoDefault, _TInt),
8413 ("amount", _NoDefault, _TInt),
8414 ("wait_for_sync", True, _TBool),
8418 def ExpandNames(self):
8419 self._ExpandAndLockInstance()
8420 self.needed_locks[locking.LEVEL_NODE] = []
8421 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8423 def DeclareLocks(self, level):
8424 if level == locking.LEVEL_NODE:
8425 self._LockInstancesNodes()
8427 def BuildHooksEnv(self):
8430 This runs on the master, the primary and all the secondaries.
8434 "DISK": self.op.disk,
8435 "AMOUNT": self.op.amount,
8437 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8438 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8441 def CheckPrereq(self):
8442 """Check prerequisites.
8444 This checks that the instance is in the cluster.
8447 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8448 assert instance is not None, \
8449 "Cannot retrieve locked instance %s" % self.op.instance_name
8450 nodenames = list(instance.all_nodes)
8451 for node in nodenames:
8452 _CheckNodeOnline(self, node)
8454 self.instance = instance
8456 if instance.disk_template not in constants.DTS_GROWABLE:
8457 raise errors.OpPrereqError("Instance's disk layout does not support"
8458 " growing.", errors.ECODE_INVAL)
8460 self.disk = instance.FindDisk(self.op.disk)
8462 if instance.disk_template != constants.DT_FILE:
8463 # TODO: check the free disk space for file, when that feature will be
8465 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8467 def Exec(self, feedback_fn):
8468 """Execute disk grow.
8471 instance = self.instance
8474 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8476 raise errors.OpExecError("Cannot activate block device to grow")
8478 for node in instance.all_nodes:
8479 self.cfg.SetDiskID(disk, node)
8480 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8481 result.Raise("Grow request failed to node %s" % node)
8483 # TODO: Rewrite code to work properly
8484 # DRBD goes into sync mode for a short amount of time after executing the
8485 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8486 # calling "resize" in sync mode fails. Sleeping for a short amount of
8487 # time is a work-around.
8490 disk.RecordGrow(self.op.amount)
8491 self.cfg.Update(instance, feedback_fn)
8492 if self.op.wait_for_sync:
8493 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8495 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8496 " status.\nPlease check the instance.")
8497 if not instance.admin_up:
8498 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8499 elif not instance.admin_up:
8500 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8501 " not supposed to be running because no wait for"
8502 " sync mode was requested.")
8505 class LUQueryInstanceData(NoHooksLU):
8506 """Query runtime instance data.
8510 ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8511 ("static", False, _TBool),
8515 def ExpandNames(self):
8516 self.needed_locks = {}
8517 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8519 if self.op.instances:
8520 self.wanted_names = []
8521 for name in self.op.instances:
8522 full_name = _ExpandInstanceName(self.cfg, name)
8523 self.wanted_names.append(full_name)
8524 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8526 self.wanted_names = None
8527 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8529 self.needed_locks[locking.LEVEL_NODE] = []
8530 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8532 def DeclareLocks(self, level):
8533 if level == locking.LEVEL_NODE:
8534 self._LockInstancesNodes()
8536 def CheckPrereq(self):
8537 """Check prerequisites.
8539 This only checks the optional instance list against the existing names.
8542 if self.wanted_names is None:
8543 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8545 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8546 in self.wanted_names]
8548 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8549 """Returns the status of a block device
8552 if self.op.static or not node:
8555 self.cfg.SetDiskID(dev, node)
8557 result = self.rpc.call_blockdev_find(node, dev)
8561 result.Raise("Can't compute disk status for %s" % instance_name)
8563 status = result.payload
8567 return (status.dev_path, status.major, status.minor,
8568 status.sync_percent, status.estimated_time,
8569 status.is_degraded, status.ldisk_status)
8571 def _ComputeDiskStatus(self, instance, snode, dev):
8572 """Compute block device status.
8575 if dev.dev_type in constants.LDS_DRBD:
8576 # we change the snode then (otherwise we use the one passed in)
8577 if dev.logical_id[0] == instance.primary_node:
8578 snode = dev.logical_id[1]
8580 snode = dev.logical_id[0]
8582 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8584 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8587 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8588 for child in dev.children]
8593 "iv_name": dev.iv_name,
8594 "dev_type": dev.dev_type,
8595 "logical_id": dev.logical_id,
8596 "physical_id": dev.physical_id,
8597 "pstatus": dev_pstatus,
8598 "sstatus": dev_sstatus,
8599 "children": dev_children,
8606 def Exec(self, feedback_fn):
8607 """Gather and return data"""
8610 cluster = self.cfg.GetClusterInfo()
8612 for instance in self.wanted_instances:
8613 if not self.op.static:
8614 remote_info = self.rpc.call_instance_info(instance.primary_node,
8616 instance.hypervisor)
8617 remote_info.Raise("Error checking node %s" % instance.primary_node)
8618 remote_info = remote_info.payload
8619 if remote_info and "state" in remote_info:
8622 remote_state = "down"
8625 if instance.admin_up:
8628 config_state = "down"
8630 disks = [self._ComputeDiskStatus(instance, None, device)
8631 for device in instance.disks]
8634 "name": instance.name,
8635 "config_state": config_state,
8636 "run_state": remote_state,
8637 "pnode": instance.primary_node,
8638 "snodes": instance.secondary_nodes,
8640 # this happens to be the same format used for hooks
8641 "nics": _NICListToTuple(self, instance.nics),
8642 "disk_template": instance.disk_template,
8644 "hypervisor": instance.hypervisor,
8645 "network_port": instance.network_port,
8646 "hv_instance": instance.hvparams,
8647 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8648 "be_instance": instance.beparams,
8649 "be_actual": cluster.FillBE(instance),
8650 "os_instance": instance.osparams,
8651 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8652 "serial_no": instance.serial_no,
8653 "mtime": instance.mtime,
8654 "ctime": instance.ctime,
8655 "uuid": instance.uuid,
8658 result[instance.name] = idict
8663 class LUSetInstanceParams(LogicalUnit):
8664 """Modifies an instances's parameters.
8667 HPATH = "instance-modify"
8668 HTYPE = constants.HTYPE_INSTANCE
8671 ("nics", _EmptyList, _TList),
8672 ("disks", _EmptyList, _TList),
8673 ("beparams", _EmptyDict, _TDict),
8674 ("hvparams", _EmptyDict, _TDict),
8675 ("disk_template", None, _TMaybeString),
8676 ("remote_node", None, _TMaybeString),
8677 ("os_name", None, _TMaybeString),
8678 ("force_variant", False, _TBool),
8679 ("osparams", None, _TOr(_TDict, _TNone)),
8684 def CheckArguments(self):
8685 if not (self.op.nics or self.op.disks or self.op.disk_template or
8686 self.op.hvparams or self.op.beparams or self.op.os_name):
8687 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8689 if self.op.hvparams:
8690 _CheckGlobalHvParams(self.op.hvparams)
8694 for disk_op, disk_dict in self.op.disks:
8695 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8696 if disk_op == constants.DDM_REMOVE:
8699 elif disk_op == constants.DDM_ADD:
8702 if not isinstance(disk_op, int):
8703 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8704 if not isinstance(disk_dict, dict):
8705 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8706 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8708 if disk_op == constants.DDM_ADD:
8709 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8710 if mode not in constants.DISK_ACCESS_SET:
8711 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8713 size = disk_dict.get('size', None)
8715 raise errors.OpPrereqError("Required disk parameter size missing",
8719 except (TypeError, ValueError), err:
8720 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8721 str(err), errors.ECODE_INVAL)
8722 disk_dict['size'] = size
8724 # modification of disk
8725 if 'size' in disk_dict:
8726 raise errors.OpPrereqError("Disk size change not possible, use"
8727 " grow-disk", errors.ECODE_INVAL)
8729 if disk_addremove > 1:
8730 raise errors.OpPrereqError("Only one disk add or remove operation"
8731 " supported at a time", errors.ECODE_INVAL)
8733 if self.op.disks and self.op.disk_template is not None:
8734 raise errors.OpPrereqError("Disk template conversion and other disk"
8735 " changes not supported at the same time",
8738 if self.op.disk_template:
8739 _CheckDiskTemplate(self.op.disk_template)
8740 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8741 self.op.remote_node is None):
8742 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8743 " one requires specifying a secondary node",
8748 for nic_op, nic_dict in self.op.nics:
8749 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8750 if nic_op == constants.DDM_REMOVE:
8753 elif nic_op == constants.DDM_ADD:
8756 if not isinstance(nic_op, int):
8757 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8758 if not isinstance(nic_dict, dict):
8759 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8760 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8762 # nic_dict should be a dict
8763 nic_ip = nic_dict.get('ip', None)
8764 if nic_ip is not None:
8765 if nic_ip.lower() == constants.VALUE_NONE:
8766 nic_dict['ip'] = None
8768 if not netutils.IPAddress.IsValid(nic_ip):
8769 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8772 nic_bridge = nic_dict.get('bridge', None)
8773 nic_link = nic_dict.get('link', None)
8774 if nic_bridge and nic_link:
8775 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8776 " at the same time", errors.ECODE_INVAL)
8777 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8778 nic_dict['bridge'] = None
8779 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8780 nic_dict['link'] = None
8782 if nic_op == constants.DDM_ADD:
8783 nic_mac = nic_dict.get('mac', None)
8785 nic_dict['mac'] = constants.VALUE_AUTO
8787 if 'mac' in nic_dict:
8788 nic_mac = nic_dict['mac']
8789 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8790 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8792 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8793 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8794 " modifying an existing nic",
8797 if nic_addremove > 1:
8798 raise errors.OpPrereqError("Only one NIC add or remove operation"
8799 " supported at a time", errors.ECODE_INVAL)
8801 def ExpandNames(self):
8802 self._ExpandAndLockInstance()
8803 self.needed_locks[locking.LEVEL_NODE] = []
8804 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8806 def DeclareLocks(self, level):
8807 if level == locking.LEVEL_NODE:
8808 self._LockInstancesNodes()
8809 if self.op.disk_template and self.op.remote_node:
8810 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8811 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8813 def BuildHooksEnv(self):
8816 This runs on the master, primary and secondaries.
8820 if constants.BE_MEMORY in self.be_new:
8821 args['memory'] = self.be_new[constants.BE_MEMORY]
8822 if constants.BE_VCPUS in self.be_new:
8823 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8824 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8825 # information at all.
8828 nic_override = dict(self.op.nics)
8829 for idx, nic in enumerate(self.instance.nics):
8830 if idx in nic_override:
8831 this_nic_override = nic_override[idx]
8833 this_nic_override = {}
8834 if 'ip' in this_nic_override:
8835 ip = this_nic_override['ip']
8838 if 'mac' in this_nic_override:
8839 mac = this_nic_override['mac']
8842 if idx in self.nic_pnew:
8843 nicparams = self.nic_pnew[idx]
8845 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8846 mode = nicparams[constants.NIC_MODE]
8847 link = nicparams[constants.NIC_LINK]
8848 args['nics'].append((ip, mac, mode, link))
8849 if constants.DDM_ADD in nic_override:
8850 ip = nic_override[constants.DDM_ADD].get('ip', None)
8851 mac = nic_override[constants.DDM_ADD]['mac']
8852 nicparams = self.nic_pnew[constants.DDM_ADD]
8853 mode = nicparams[constants.NIC_MODE]
8854 link = nicparams[constants.NIC_LINK]
8855 args['nics'].append((ip, mac, mode, link))
8856 elif constants.DDM_REMOVE in nic_override:
8857 del args['nics'][-1]
8859 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8860 if self.op.disk_template:
8861 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8862 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8865 def CheckPrereq(self):
8866 """Check prerequisites.
8868 This only checks the instance list against the existing names.
8871 # checking the new params on the primary/secondary nodes
8873 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8874 cluster = self.cluster = self.cfg.GetClusterInfo()
8875 assert self.instance is not None, \
8876 "Cannot retrieve locked instance %s" % self.op.instance_name
8877 pnode = instance.primary_node
8878 nodelist = list(instance.all_nodes)
8881 if self.op.os_name and not self.op.force:
8882 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8883 self.op.force_variant)
8884 instance_os = self.op.os_name
8886 instance_os = instance.os
8888 if self.op.disk_template:
8889 if instance.disk_template == self.op.disk_template:
8890 raise errors.OpPrereqError("Instance already has disk template %s" %
8891 instance.disk_template, errors.ECODE_INVAL)
8893 if (instance.disk_template,
8894 self.op.disk_template) not in self._DISK_CONVERSIONS:
8895 raise errors.OpPrereqError("Unsupported disk template conversion from"
8896 " %s to %s" % (instance.disk_template,
8897 self.op.disk_template),
8899 _CheckInstanceDown(self, instance, "cannot change disk template")
8900 if self.op.disk_template in constants.DTS_NET_MIRROR:
8901 if self.op.remote_node == pnode:
8902 raise errors.OpPrereqError("Given new secondary node %s is the same"
8903 " as the primary node of the instance" %
8904 self.op.remote_node, errors.ECODE_STATE)
8905 _CheckNodeOnline(self, self.op.remote_node)
8906 _CheckNodeNotDrained(self, self.op.remote_node)
8907 disks = [{"size": d.size} for d in instance.disks]
8908 required = _ComputeDiskSize(self.op.disk_template, disks)
8909 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8911 # hvparams processing
8912 if self.op.hvparams:
8913 hv_type = instance.hypervisor
8914 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8915 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8916 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8919 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8920 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8921 self.hv_new = hv_new # the new actual values
8922 self.hv_inst = i_hvdict # the new dict (without defaults)
8924 self.hv_new = self.hv_inst = {}
8926 # beparams processing
8927 if self.op.beparams:
8928 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8930 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8931 be_new = cluster.SimpleFillBE(i_bedict)
8932 self.be_new = be_new # the new actual values
8933 self.be_inst = i_bedict # the new dict (without defaults)
8935 self.be_new = self.be_inst = {}
8937 # osparams processing
8938 if self.op.osparams:
8939 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8940 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8941 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8942 self.os_inst = i_osdict # the new dict (without defaults)
8944 self.os_new = self.os_inst = {}
8948 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8949 mem_check_list = [pnode]
8950 if be_new[constants.BE_AUTO_BALANCE]:
8951 # either we changed auto_balance to yes or it was from before
8952 mem_check_list.extend(instance.secondary_nodes)
8953 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8954 instance.hypervisor)
8955 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8956 instance.hypervisor)
8957 pninfo = nodeinfo[pnode]
8958 msg = pninfo.fail_msg
8960 # Assume the primary node is unreachable and go ahead
8961 self.warn.append("Can't get info from primary node %s: %s" %
8963 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8964 self.warn.append("Node data from primary node %s doesn't contain"
8965 " free memory information" % pnode)
8966 elif instance_info.fail_msg:
8967 self.warn.append("Can't get instance runtime information: %s" %
8968 instance_info.fail_msg)
8970 if instance_info.payload:
8971 current_mem = int(instance_info.payload['memory'])
8973 # Assume instance not running
8974 # (there is a slight race condition here, but it's not very probable,
8975 # and we have no other way to check)
8977 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8978 pninfo.payload['memory_free'])
8980 raise errors.OpPrereqError("This change will prevent the instance"
8981 " from starting, due to %d MB of memory"
8982 " missing on its primary node" % miss_mem,
8985 if be_new[constants.BE_AUTO_BALANCE]:
8986 for node, nres in nodeinfo.items():
8987 if node not in instance.secondary_nodes:
8991 self.warn.append("Can't get info from secondary node %s: %s" %
8993 elif not isinstance(nres.payload.get('memory_free', None), int):
8994 self.warn.append("Secondary node %s didn't return free"
8995 " memory information" % node)
8996 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8997 self.warn.append("Not enough memory to failover instance to"
8998 " secondary node %s" % node)
9003 for nic_op, nic_dict in self.op.nics:
9004 if nic_op == constants.DDM_REMOVE:
9005 if not instance.nics:
9006 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9009 if nic_op != constants.DDM_ADD:
9011 if not instance.nics:
9012 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9013 " no NICs" % nic_op,
9015 if nic_op < 0 or nic_op >= len(instance.nics):
9016 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9018 (nic_op, len(instance.nics) - 1),
9020 old_nic_params = instance.nics[nic_op].nicparams
9021 old_nic_ip = instance.nics[nic_op].ip
9026 update_params_dict = dict([(key, nic_dict[key])
9027 for key in constants.NICS_PARAMETERS
9028 if key in nic_dict])
9030 if 'bridge' in nic_dict:
9031 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9033 new_nic_params = _GetUpdatedParams(old_nic_params,
9035 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9036 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9037 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9038 self.nic_pinst[nic_op] = new_nic_params
9039 self.nic_pnew[nic_op] = new_filled_nic_params
9040 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9042 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9043 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9044 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9046 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9048 self.warn.append(msg)
9050 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9051 if new_nic_mode == constants.NIC_MODE_ROUTED:
9052 if 'ip' in nic_dict:
9053 nic_ip = nic_dict['ip']
9057 raise errors.OpPrereqError('Cannot set the nic ip to None'
9058 ' on a routed nic', errors.ECODE_INVAL)
9059 if 'mac' in nic_dict:
9060 nic_mac = nic_dict['mac']
9062 raise errors.OpPrereqError('Cannot set the nic mac to None',
9064 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9065 # otherwise generate the mac
9066 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9068 # or validate/reserve the current one
9070 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9071 except errors.ReservationError:
9072 raise errors.OpPrereqError("MAC address %s already in use"
9073 " in cluster" % nic_mac,
9074 errors.ECODE_NOTUNIQUE)
9077 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9078 raise errors.OpPrereqError("Disk operations not supported for"
9079 " diskless instances",
9081 for disk_op, _ in self.op.disks:
9082 if disk_op == constants.DDM_REMOVE:
9083 if len(instance.disks) == 1:
9084 raise errors.OpPrereqError("Cannot remove the last disk of"
9085 " an instance", errors.ECODE_INVAL)
9086 _CheckInstanceDown(self, instance, "cannot remove disks")
9088 if (disk_op == constants.DDM_ADD and
9089 len(instance.nics) >= constants.MAX_DISKS):
9090 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9091 " add more" % constants.MAX_DISKS,
9093 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9095 if disk_op < 0 or disk_op >= len(instance.disks):
9096 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9098 (disk_op, len(instance.disks)),
9103 def _ConvertPlainToDrbd(self, feedback_fn):
9104 """Converts an instance from plain to drbd.
9107 feedback_fn("Converting template to drbd")
9108 instance = self.instance
9109 pnode = instance.primary_node
9110 snode = self.op.remote_node
9112 # create a fake disk info for _GenerateDiskTemplate
9113 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9114 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9115 instance.name, pnode, [snode],
9116 disk_info, None, None, 0)
9117 info = _GetInstanceInfoText(instance)
9118 feedback_fn("Creating aditional volumes...")
9119 # first, create the missing data and meta devices
9120 for disk in new_disks:
9121 # unfortunately this is... not too nice
9122 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9124 for child in disk.children:
9125 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9126 # at this stage, all new LVs have been created, we can rename the
9128 feedback_fn("Renaming original volumes...")
9129 rename_list = [(o, n.children[0].logical_id)
9130 for (o, n) in zip(instance.disks, new_disks)]
9131 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9132 result.Raise("Failed to rename original LVs")
9134 feedback_fn("Initializing DRBD devices...")
9135 # all child devices are in place, we can now create the DRBD devices
9136 for disk in new_disks:
9137 for node in [pnode, snode]:
9138 f_create = node == pnode
9139 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9141 # at this point, the instance has been modified
9142 instance.disk_template = constants.DT_DRBD8
9143 instance.disks = new_disks
9144 self.cfg.Update(instance, feedback_fn)
9146 # disks are created, waiting for sync
9147 disk_abort = not _WaitForSync(self, instance)
9149 raise errors.OpExecError("There are some degraded disks for"
9150 " this instance, please cleanup manually")
9152 def _ConvertDrbdToPlain(self, feedback_fn):
9153 """Converts an instance from drbd to plain.
9156 instance = self.instance
9157 assert len(instance.secondary_nodes) == 1
9158 pnode = instance.primary_node
9159 snode = instance.secondary_nodes[0]
9160 feedback_fn("Converting template to plain")
9162 old_disks = instance.disks
9163 new_disks = [d.children[0] for d in old_disks]
9165 # copy over size and mode
9166 for parent, child in zip(old_disks, new_disks):
9167 child.size = parent.size
9168 child.mode = parent.mode
9170 # update instance structure
9171 instance.disks = new_disks
9172 instance.disk_template = constants.DT_PLAIN
9173 self.cfg.Update(instance, feedback_fn)
9175 feedback_fn("Removing volumes on the secondary node...")
9176 for disk in old_disks:
9177 self.cfg.SetDiskID(disk, snode)
9178 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9180 self.LogWarning("Could not remove block device %s on node %s,"
9181 " continuing anyway: %s", disk.iv_name, snode, msg)
9183 feedback_fn("Removing unneeded volumes on the primary node...")
9184 for idx, disk in enumerate(old_disks):
9185 meta = disk.children[1]
9186 self.cfg.SetDiskID(meta, pnode)
9187 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9189 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9190 " continuing anyway: %s", idx, pnode, msg)
9193 def Exec(self, feedback_fn):
9194 """Modifies an instance.
9196 All parameters take effect only at the next restart of the instance.
9199 # Process here the warnings from CheckPrereq, as we don't have a
9200 # feedback_fn there.
9201 for warn in self.warn:
9202 feedback_fn("WARNING: %s" % warn)
9205 instance = self.instance
9207 for disk_op, disk_dict in self.op.disks:
9208 if disk_op == constants.DDM_REMOVE:
9209 # remove the last disk
9210 device = instance.disks.pop()
9211 device_idx = len(instance.disks)
9212 for node, disk in device.ComputeNodeTree(instance.primary_node):
9213 self.cfg.SetDiskID(disk, node)
9214 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9216 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9217 " continuing anyway", device_idx, node, msg)
9218 result.append(("disk/%d" % device_idx, "remove"))
9219 elif disk_op == constants.DDM_ADD:
9221 if instance.disk_template == constants.DT_FILE:
9222 file_driver, file_path = instance.disks[0].logical_id
9223 file_path = os.path.dirname(file_path)
9225 file_driver = file_path = None
9226 disk_idx_base = len(instance.disks)
9227 new_disk = _GenerateDiskTemplate(self,
9228 instance.disk_template,
9229 instance.name, instance.primary_node,
9230 instance.secondary_nodes,
9235 instance.disks.append(new_disk)
9236 info = _GetInstanceInfoText(instance)
9238 logging.info("Creating volume %s for instance %s",
9239 new_disk.iv_name, instance.name)
9240 # Note: this needs to be kept in sync with _CreateDisks
9242 for node in instance.all_nodes:
9243 f_create = node == instance.primary_node
9245 _CreateBlockDev(self, node, instance, new_disk,
9246 f_create, info, f_create)
9247 except errors.OpExecError, err:
9248 self.LogWarning("Failed to create volume %s (%s) on"
9250 new_disk.iv_name, new_disk, node, err)
9251 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9252 (new_disk.size, new_disk.mode)))
9254 # change a given disk
9255 instance.disks[disk_op].mode = disk_dict['mode']
9256 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9258 if self.op.disk_template:
9259 r_shut = _ShutdownInstanceDisks(self, instance)
9261 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9262 " proceed with disk template conversion")
9263 mode = (instance.disk_template, self.op.disk_template)
9265 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9267 self.cfg.ReleaseDRBDMinors(instance.name)
9269 result.append(("disk_template", self.op.disk_template))
9272 for nic_op, nic_dict in self.op.nics:
9273 if nic_op == constants.DDM_REMOVE:
9274 # remove the last nic
9275 del instance.nics[-1]
9276 result.append(("nic.%d" % len(instance.nics), "remove"))
9277 elif nic_op == constants.DDM_ADD:
9278 # mac and bridge should be set, by now
9279 mac = nic_dict['mac']
9280 ip = nic_dict.get('ip', None)
9281 nicparams = self.nic_pinst[constants.DDM_ADD]
9282 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9283 instance.nics.append(new_nic)
9284 result.append(("nic.%d" % (len(instance.nics) - 1),
9285 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9286 (new_nic.mac, new_nic.ip,
9287 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9288 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9291 for key in 'mac', 'ip':
9293 setattr(instance.nics[nic_op], key, nic_dict[key])
9294 if nic_op in self.nic_pinst:
9295 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9296 for key, val in nic_dict.iteritems():
9297 result.append(("nic.%s/%d" % (key, nic_op), val))
9300 if self.op.hvparams:
9301 instance.hvparams = self.hv_inst
9302 for key, val in self.op.hvparams.iteritems():
9303 result.append(("hv/%s" % key, val))
9306 if self.op.beparams:
9307 instance.beparams = self.be_inst
9308 for key, val in self.op.beparams.iteritems():
9309 result.append(("be/%s" % key, val))
9313 instance.os = self.op.os_name
9316 if self.op.osparams:
9317 instance.osparams = self.os_inst
9318 for key, val in self.op.osparams.iteritems():
9319 result.append(("os/%s" % key, val))
9321 self.cfg.Update(instance, feedback_fn)
9325 _DISK_CONVERSIONS = {
9326 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9327 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9331 class LUQueryExports(NoHooksLU):
9332 """Query the exports list
9336 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9337 ("use_locking", False, _TBool),
9341 def ExpandNames(self):
9342 self.needed_locks = {}
9343 self.share_locks[locking.LEVEL_NODE] = 1
9344 if not self.op.nodes:
9345 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9347 self.needed_locks[locking.LEVEL_NODE] = \
9348 _GetWantedNodes(self, self.op.nodes)
9350 def Exec(self, feedback_fn):
9351 """Compute the list of all the exported system images.
9354 @return: a dictionary with the structure node->(export-list)
9355 where export-list is a list of the instances exported on
9359 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9360 rpcresult = self.rpc.call_export_list(self.nodes)
9362 for node in rpcresult:
9363 if rpcresult[node].fail_msg:
9364 result[node] = False
9366 result[node] = rpcresult[node].payload
9371 class LUPrepareExport(NoHooksLU):
9372 """Prepares an instance for an export and returns useful information.
9377 ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9381 def ExpandNames(self):
9382 self._ExpandAndLockInstance()
9384 def CheckPrereq(self):
9385 """Check prerequisites.
9388 instance_name = self.op.instance_name
9390 self.instance = self.cfg.GetInstanceInfo(instance_name)
9391 assert self.instance is not None, \
9392 "Cannot retrieve locked instance %s" % self.op.instance_name
9393 _CheckNodeOnline(self, self.instance.primary_node)
9395 self._cds = _GetClusterDomainSecret()
9397 def Exec(self, feedback_fn):
9398 """Prepares an instance for an export.
9401 instance = self.instance
9403 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9404 salt = utils.GenerateSecret(8)
9406 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9407 result = self.rpc.call_x509_cert_create(instance.primary_node,
9408 constants.RIE_CERT_VALIDITY)
9409 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9411 (name, cert_pem) = result.payload
9413 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9417 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9418 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9420 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9426 class LUExportInstance(LogicalUnit):
9427 """Export an instance to an image in the cluster.
9430 HPATH = "instance-export"
9431 HTYPE = constants.HTYPE_INSTANCE
9434 ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9435 ("shutdown", True, _TBool),
9437 ("remove_instance", False, _TBool),
9438 ("ignore_remove_failures", False, _TBool),
9439 ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9440 ("x509_key_name", None, _TOr(_TList, _TNone)),
9441 ("destination_x509_ca", None, _TMaybeString),
9445 def CheckArguments(self):
9446 """Check the arguments.
9449 self.x509_key_name = self.op.x509_key_name
9450 self.dest_x509_ca_pem = self.op.destination_x509_ca
9452 if self.op.remove_instance and not self.op.shutdown:
9453 raise errors.OpPrereqError("Can not remove instance without shutting it"
9456 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9457 if not self.x509_key_name:
9458 raise errors.OpPrereqError("Missing X509 key name for encryption",
9461 if not self.dest_x509_ca_pem:
9462 raise errors.OpPrereqError("Missing destination X509 CA",
9465 def ExpandNames(self):
9466 self._ExpandAndLockInstance()
9468 # Lock all nodes for local exports
9469 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9470 # FIXME: lock only instance primary and destination node
9472 # Sad but true, for now we have do lock all nodes, as we don't know where
9473 # the previous export might be, and in this LU we search for it and
9474 # remove it from its current node. In the future we could fix this by:
9475 # - making a tasklet to search (share-lock all), then create the
9476 # new one, then one to remove, after
9477 # - removing the removal operation altogether
9478 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9480 def DeclareLocks(self, level):
9481 """Last minute lock declaration."""
9482 # All nodes are locked anyway, so nothing to do here.
9484 def BuildHooksEnv(self):
9487 This will run on the master, primary node and target node.
9491 "EXPORT_MODE": self.op.mode,
9492 "EXPORT_NODE": self.op.target_node,
9493 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9494 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9495 # TODO: Generic function for boolean env variables
9496 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9499 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9501 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9503 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9504 nl.append(self.op.target_node)
9508 def CheckPrereq(self):
9509 """Check prerequisites.
9511 This checks that the instance and node names are valid.
9514 instance_name = self.op.instance_name
9516 self.instance = self.cfg.GetInstanceInfo(instance_name)
9517 assert self.instance is not None, \
9518 "Cannot retrieve locked instance %s" % self.op.instance_name
9519 _CheckNodeOnline(self, self.instance.primary_node)
9521 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9522 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9523 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9524 assert self.dst_node is not None
9526 _CheckNodeOnline(self, self.dst_node.name)
9527 _CheckNodeNotDrained(self, self.dst_node.name)
9530 self.dest_disk_info = None
9531 self.dest_x509_ca = None
9533 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9534 self.dst_node = None
9536 if len(self.op.target_node) != len(self.instance.disks):
9537 raise errors.OpPrereqError(("Received destination information for %s"
9538 " disks, but instance %s has %s disks") %
9539 (len(self.op.target_node), instance_name,
9540 len(self.instance.disks)),
9543 cds = _GetClusterDomainSecret()
9545 # Check X509 key name
9547 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9548 except (TypeError, ValueError), err:
9549 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9551 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9552 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9555 # Load and verify CA
9557 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9558 except OpenSSL.crypto.Error, err:
9559 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9560 (err, ), errors.ECODE_INVAL)
9562 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9563 if errcode is not None:
9564 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9565 (msg, ), errors.ECODE_INVAL)
9567 self.dest_x509_ca = cert
9569 # Verify target information
9571 for idx, disk_data in enumerate(self.op.target_node):
9573 (host, port, magic) = \
9574 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9575 except errors.GenericError, err:
9576 raise errors.OpPrereqError("Target info for disk %s: %s" %
9577 (idx, err), errors.ECODE_INVAL)
9579 disk_info.append((host, port, magic))
9581 assert len(disk_info) == len(self.op.target_node)
9582 self.dest_disk_info = disk_info
9585 raise errors.ProgrammerError("Unhandled export mode %r" %
9588 # instance disk type verification
9589 # TODO: Implement export support for file-based disks
9590 for disk in self.instance.disks:
9591 if disk.dev_type == constants.LD_FILE:
9592 raise errors.OpPrereqError("Export not supported for instances with"
9593 " file-based disks", errors.ECODE_INVAL)
9595 def _CleanupExports(self, feedback_fn):
9596 """Removes exports of current instance from all other nodes.
9598 If an instance in a cluster with nodes A..D was exported to node C, its
9599 exports will be removed from the nodes A, B and D.
9602 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9604 nodelist = self.cfg.GetNodeList()
9605 nodelist.remove(self.dst_node.name)
9607 # on one-node clusters nodelist will be empty after the removal
9608 # if we proceed the backup would be removed because OpQueryExports
9609 # substitutes an empty list with the full cluster node list.
9610 iname = self.instance.name
9612 feedback_fn("Removing old exports for instance %s" % iname)
9613 exportlist = self.rpc.call_export_list(nodelist)
9614 for node in exportlist:
9615 if exportlist[node].fail_msg:
9617 if iname in exportlist[node].payload:
9618 msg = self.rpc.call_export_remove(node, iname).fail_msg
9620 self.LogWarning("Could not remove older export for instance %s"
9621 " on node %s: %s", iname, node, msg)
9623 def Exec(self, feedback_fn):
9624 """Export an instance to an image in the cluster.
9627 assert self.op.mode in constants.EXPORT_MODES
9629 instance = self.instance
9630 src_node = instance.primary_node
9632 if self.op.shutdown:
9633 # shutdown the instance, but not the disks
9634 feedback_fn("Shutting down instance %s" % instance.name)
9635 result = self.rpc.call_instance_shutdown(src_node, instance,
9636 self.op.shutdown_timeout)
9637 # TODO: Maybe ignore failures if ignore_remove_failures is set
9638 result.Raise("Could not shutdown instance %s on"
9639 " node %s" % (instance.name, src_node))
9641 # set the disks ID correctly since call_instance_start needs the
9642 # correct drbd minor to create the symlinks
9643 for disk in instance.disks:
9644 self.cfg.SetDiskID(disk, src_node)
9646 activate_disks = (not instance.admin_up)
9649 # Activate the instance disks if we'exporting a stopped instance
9650 feedback_fn("Activating disks for %s" % instance.name)
9651 _StartInstanceDisks(self, instance, None)
9654 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9657 helper.CreateSnapshots()
9659 if (self.op.shutdown and instance.admin_up and
9660 not self.op.remove_instance):
9661 assert not activate_disks
9662 feedback_fn("Starting instance %s" % instance.name)
9663 result = self.rpc.call_instance_start(src_node, instance, None, None)
9664 msg = result.fail_msg
9666 feedback_fn("Failed to start instance: %s" % msg)
9667 _ShutdownInstanceDisks(self, instance)
9668 raise errors.OpExecError("Could not start instance: %s" % msg)
9670 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9671 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9672 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9673 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9674 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9676 (key_name, _, _) = self.x509_key_name
9679 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9682 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9683 key_name, dest_ca_pem,
9688 # Check for backwards compatibility
9689 assert len(dresults) == len(instance.disks)
9690 assert compat.all(isinstance(i, bool) for i in dresults), \
9691 "Not all results are boolean: %r" % dresults
9695 feedback_fn("Deactivating disks for %s" % instance.name)
9696 _ShutdownInstanceDisks(self, instance)
9698 if not (compat.all(dresults) and fin_resu):
9701 failures.append("export finalization")
9702 if not compat.all(dresults):
9703 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9705 failures.append("disk export: disk(s) %s" % fdsk)
9707 raise errors.OpExecError("Export failed, errors in %s" %
9708 utils.CommaJoin(failures))
9710 # At this point, the export was successful, we can cleanup/finish
9712 # Remove instance if requested
9713 if self.op.remove_instance:
9714 feedback_fn("Removing instance %s" % instance.name)
9715 _RemoveInstance(self, feedback_fn, instance,
9716 self.op.ignore_remove_failures)
9718 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9719 self._CleanupExports(feedback_fn)
9721 return fin_resu, dresults
9724 class LURemoveExport(NoHooksLU):
9725 """Remove exports related to the named instance.
9733 def ExpandNames(self):
9734 self.needed_locks = {}
9735 # We need all nodes to be locked in order for RemoveExport to work, but we
9736 # don't need to lock the instance itself, as nothing will happen to it (and
9737 # we can remove exports also for a removed instance)
9738 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9740 def Exec(self, feedback_fn):
9741 """Remove any export.
9744 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9745 # If the instance was not found we'll try with the name that was passed in.
9746 # This will only work if it was an FQDN, though.
9748 if not instance_name:
9750 instance_name = self.op.instance_name
9752 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9753 exportlist = self.rpc.call_export_list(locked_nodes)
9755 for node in exportlist:
9756 msg = exportlist[node].fail_msg
9758 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9760 if instance_name in exportlist[node].payload:
9762 result = self.rpc.call_export_remove(node, instance_name)
9763 msg = result.fail_msg
9765 logging.error("Could not remove export for instance %s"
9766 " on node %s: %s", instance_name, node, msg)
9768 if fqdn_warn and not found:
9769 feedback_fn("Export not found. If trying to remove an export belonging"
9770 " to a deleted instance please use its Fully Qualified"
9774 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9777 This is an abstract class which is the parent of all the other tags LUs.
9781 def ExpandNames(self):
9782 self.needed_locks = {}
9783 if self.op.kind == constants.TAG_NODE:
9784 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9785 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9786 elif self.op.kind == constants.TAG_INSTANCE:
9787 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9788 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9790 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
9791 # not possible to acquire the BGL based on opcode parameters)
9793 def CheckPrereq(self):
9794 """Check prerequisites.
9797 if self.op.kind == constants.TAG_CLUSTER:
9798 self.target = self.cfg.GetClusterInfo()
9799 elif self.op.kind == constants.TAG_NODE:
9800 self.target = self.cfg.GetNodeInfo(self.op.name)
9801 elif self.op.kind == constants.TAG_INSTANCE:
9802 self.target = self.cfg.GetInstanceInfo(self.op.name)
9804 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9805 str(self.op.kind), errors.ECODE_INVAL)
9808 class LUGetTags(TagsLU):
9809 """Returns the tags of a given object.
9813 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9814 # Name is only meaningful for nodes and instances
9815 ("name", _NoDefault, _TMaybeString),
9819 def ExpandNames(self):
9820 TagsLU.ExpandNames(self)
9822 # Share locks as this is only a read operation
9823 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9825 def Exec(self, feedback_fn):
9826 """Returns the tag list.
9829 return list(self.target.GetTags())
9832 class LUSearchTags(NoHooksLU):
9833 """Searches the tags for a given pattern.
9837 ("pattern", _NoDefault, _TNonEmptyString),
9841 def ExpandNames(self):
9842 self.needed_locks = {}
9844 def CheckPrereq(self):
9845 """Check prerequisites.
9847 This checks the pattern passed for validity by compiling it.
9851 self.re = re.compile(self.op.pattern)
9852 except re.error, err:
9853 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9854 (self.op.pattern, err), errors.ECODE_INVAL)
9856 def Exec(self, feedback_fn):
9857 """Returns the tag list.
9861 tgts = [("/cluster", cfg.GetClusterInfo())]
9862 ilist = cfg.GetAllInstancesInfo().values()
9863 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9864 nlist = cfg.GetAllNodesInfo().values()
9865 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9867 for path, target in tgts:
9868 for tag in target.GetTags():
9869 if self.re.search(tag):
9870 results.append((path, tag))
9874 class LUAddTags(TagsLU):
9875 """Sets a tag on a given object.
9879 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9880 # Name is only meaningful for nodes and instances
9881 ("name", _NoDefault, _TMaybeString),
9882 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9886 def CheckPrereq(self):
9887 """Check prerequisites.
9889 This checks the type and length of the tag name and value.
9892 TagsLU.CheckPrereq(self)
9893 for tag in self.op.tags:
9894 objects.TaggableObject.ValidateTag(tag)
9896 def Exec(self, feedback_fn):
9901 for tag in self.op.tags:
9902 self.target.AddTag(tag)
9903 except errors.TagError, err:
9904 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9905 self.cfg.Update(self.target, feedback_fn)
9908 class LUDelTags(TagsLU):
9909 """Delete a list of tags from a given object.
9913 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9914 # Name is only meaningful for nodes and instances
9915 ("name", _NoDefault, _TMaybeString),
9916 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9920 def CheckPrereq(self):
9921 """Check prerequisites.
9923 This checks that we have the given tag.
9926 TagsLU.CheckPrereq(self)
9927 for tag in self.op.tags:
9928 objects.TaggableObject.ValidateTag(tag)
9929 del_tags = frozenset(self.op.tags)
9930 cur_tags = self.target.GetTags()
9932 diff_tags = del_tags - cur_tags
9934 diff_names = ("'%s'" % i for i in sorted(diff_tags))
9935 raise errors.OpPrereqError("Tag(s) %s not found" %
9936 (utils.CommaJoin(diff_names), ),
9939 def Exec(self, feedback_fn):
9940 """Remove the tag from the object.
9943 for tag in self.op.tags:
9944 self.target.RemoveTag(tag)
9945 self.cfg.Update(self.target, feedback_fn)
9948 class LUTestDelay(NoHooksLU):
9949 """Sleep for a specified amount of time.
9951 This LU sleeps on the master and/or nodes for a specified amount of
9956 ("duration", _NoDefault, _TFloat),
9957 ("on_master", True, _TBool),
9958 ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9959 ("repeat", 0, _TPositiveInt)
9963 def ExpandNames(self):
9964 """Expand names and set required locks.
9966 This expands the node list, if any.
9969 self.needed_locks = {}
9970 if self.op.on_nodes:
9971 # _GetWantedNodes can be used here, but is not always appropriate to use
9972 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9974 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9975 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9977 def _TestDelay(self):
9978 """Do the actual sleep.
9981 if self.op.on_master:
9982 if not utils.TestDelay(self.op.duration):
9983 raise errors.OpExecError("Error during master delay test")
9984 if self.op.on_nodes:
9985 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9986 for node, node_result in result.items():
9987 node_result.Raise("Failure during rpc call to node %s" % node)
9989 def Exec(self, feedback_fn):
9990 """Execute the test delay opcode, with the wanted repetitions.
9993 if self.op.repeat == 0:
9996 top_value = self.op.repeat - 1
9997 for i in range(self.op.repeat):
9998 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10002 class LUTestJobqueue(NoHooksLU):
10003 """Utility LU to test some aspects of the job queue.
10007 ("notify_waitlock", False, _TBool),
10008 ("notify_exec", False, _TBool),
10009 ("log_messages", _EmptyList, _TListOf(_TString)),
10010 ("fail", False, _TBool),
10014 # Must be lower than default timeout for WaitForJobChange to see whether it
10015 # notices changed jobs
10016 _CLIENT_CONNECT_TIMEOUT = 20.0
10017 _CLIENT_CONFIRM_TIMEOUT = 60.0
10020 def _NotifyUsingSocket(cls, cb, errcls):
10021 """Opens a Unix socket and waits for another program to connect.
10024 @param cb: Callback to send socket name to client
10025 @type errcls: class
10026 @param errcls: Exception class to use for errors
10029 # Using a temporary directory as there's no easy way to create temporary
10030 # sockets without writing a custom loop around tempfile.mktemp and
10032 tmpdir = tempfile.mkdtemp()
10034 tmpsock = utils.PathJoin(tmpdir, "sock")
10036 logging.debug("Creating temporary socket at %s", tmpsock)
10037 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10042 # Send details to client
10045 # Wait for client to connect before continuing
10046 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10048 (conn, _) = sock.accept()
10049 except socket.error, err:
10050 raise errcls("Client didn't connect in time (%s)" % err)
10054 # Remove as soon as client is connected
10055 shutil.rmtree(tmpdir)
10057 # Wait for client to close
10060 # pylint: disable-msg=E1101
10061 # Instance of '_socketobject' has no ... member
10062 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10064 except socket.error, err:
10065 raise errcls("Client failed to confirm notification (%s)" % err)
10069 def _SendNotification(self, test, arg, sockname):
10070 """Sends a notification to the client.
10073 @param test: Test name
10074 @param arg: Test argument (depends on test)
10075 @type sockname: string
10076 @param sockname: Socket path
10079 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10081 def _Notify(self, prereq, test, arg):
10082 """Notifies the client of a test.
10085 @param prereq: Whether this is a prereq-phase test
10087 @param test: Test name
10088 @param arg: Test argument (depends on test)
10092 errcls = errors.OpPrereqError
10094 errcls = errors.OpExecError
10096 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10100 def CheckArguments(self):
10101 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10102 self.expandnames_calls = 0
10104 def ExpandNames(self):
10105 checkargs_calls = getattr(self, "checkargs_calls", 0)
10106 if checkargs_calls < 1:
10107 raise errors.ProgrammerError("CheckArguments was not called")
10109 self.expandnames_calls += 1
10111 if self.op.notify_waitlock:
10112 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10114 self.LogInfo("Expanding names")
10116 # Get lock on master node (just to get a lock, not for a particular reason)
10117 self.needed_locks = {
10118 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10121 def Exec(self, feedback_fn):
10122 if self.expandnames_calls < 1:
10123 raise errors.ProgrammerError("ExpandNames was not called")
10125 if self.op.notify_exec:
10126 self._Notify(False, constants.JQT_EXEC, None)
10128 self.LogInfo("Executing")
10130 if self.op.log_messages:
10131 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10132 for idx, msg in enumerate(self.op.log_messages):
10133 self.LogInfo("Sending log message %s", idx + 1)
10134 feedback_fn(constants.JQT_MSGPREFIX + msg)
10135 # Report how many test messages have been sent
10136 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10139 raise errors.OpExecError("Opcode failure was requested")
10144 class IAllocator(object):
10145 """IAllocator framework.
10147 An IAllocator instance has three sets of attributes:
10148 - cfg that is needed to query the cluster
10149 - input data (all members of the _KEYS class attribute are required)
10150 - four buffer attributes (in|out_data|text), that represent the
10151 input (to the external script) in text and data structure format,
10152 and the output from it, again in two formats
10153 - the result variables from the script (success, info, nodes) for
10157 # pylint: disable-msg=R0902
10158 # lots of instance attributes
10160 "name", "mem_size", "disks", "disk_template",
10161 "os", "tags", "nics", "vcpus", "hypervisor",
10164 "name", "relocate_from",
10170 def __init__(self, cfg, rpc, mode, **kwargs):
10173 # init buffer variables
10174 self.in_text = self.out_text = self.in_data = self.out_data = None
10175 # init all input fields so that pylint is happy
10177 self.mem_size = self.disks = self.disk_template = None
10178 self.os = self.tags = self.nics = self.vcpus = None
10179 self.hypervisor = None
10180 self.relocate_from = None
10182 self.evac_nodes = None
10184 self.required_nodes = None
10185 # init result fields
10186 self.success = self.info = self.result = None
10187 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10188 keyset = self._ALLO_KEYS
10189 fn = self._AddNewInstance
10190 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10191 keyset = self._RELO_KEYS
10192 fn = self._AddRelocateInstance
10193 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10194 keyset = self._EVAC_KEYS
10195 fn = self._AddEvacuateNodes
10197 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10198 " IAllocator" % self.mode)
10200 if key not in keyset:
10201 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10202 " IAllocator" % key)
10203 setattr(self, key, kwargs[key])
10206 if key not in kwargs:
10207 raise errors.ProgrammerError("Missing input parameter '%s' to"
10208 " IAllocator" % key)
10209 self._BuildInputData(fn)
10211 def _ComputeClusterData(self):
10212 """Compute the generic allocator input data.
10214 This is the data that is independent of the actual operation.
10218 cluster_info = cfg.GetClusterInfo()
10221 "version": constants.IALLOCATOR_VERSION,
10222 "cluster_name": cfg.GetClusterName(),
10223 "cluster_tags": list(cluster_info.GetTags()),
10224 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10225 # we don't have job IDs
10227 iinfo = cfg.GetAllInstancesInfo().values()
10228 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10232 node_list = cfg.GetNodeList()
10234 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10235 hypervisor_name = self.hypervisor
10236 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10237 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10238 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10239 hypervisor_name = cluster_info.enabled_hypervisors[0]
10241 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10244 self.rpc.call_all_instances_info(node_list,
10245 cluster_info.enabled_hypervisors)
10246 for nname, nresult in node_data.items():
10247 # first fill in static (config-based) values
10248 ninfo = cfg.GetNodeInfo(nname)
10250 "tags": list(ninfo.GetTags()),
10251 "primary_ip": ninfo.primary_ip,
10252 "secondary_ip": ninfo.secondary_ip,
10253 "offline": ninfo.offline,
10254 "drained": ninfo.drained,
10255 "master_candidate": ninfo.master_candidate,
10258 if not (ninfo.offline or ninfo.drained):
10259 nresult.Raise("Can't get data for node %s" % nname)
10260 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10262 remote_info = nresult.payload
10264 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10265 'vg_size', 'vg_free', 'cpu_total']:
10266 if attr not in remote_info:
10267 raise errors.OpExecError("Node '%s' didn't return attribute"
10268 " '%s'" % (nname, attr))
10269 if not isinstance(remote_info[attr], int):
10270 raise errors.OpExecError("Node '%s' returned invalid value"
10272 (nname, attr, remote_info[attr]))
10273 # compute memory used by primary instances
10274 i_p_mem = i_p_up_mem = 0
10275 for iinfo, beinfo in i_list:
10276 if iinfo.primary_node == nname:
10277 i_p_mem += beinfo[constants.BE_MEMORY]
10278 if iinfo.name not in node_iinfo[nname].payload:
10281 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10282 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10283 remote_info['memory_free'] -= max(0, i_mem_diff)
10286 i_p_up_mem += beinfo[constants.BE_MEMORY]
10288 # compute memory used by instances
10290 "total_memory": remote_info['memory_total'],
10291 "reserved_memory": remote_info['memory_dom0'],
10292 "free_memory": remote_info['memory_free'],
10293 "total_disk": remote_info['vg_size'],
10294 "free_disk": remote_info['vg_free'],
10295 "total_cpus": remote_info['cpu_total'],
10296 "i_pri_memory": i_p_mem,
10297 "i_pri_up_memory": i_p_up_mem,
10299 pnr.update(pnr_dyn)
10301 node_results[nname] = pnr
10302 data["nodes"] = node_results
10306 for iinfo, beinfo in i_list:
10308 for nic in iinfo.nics:
10309 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10310 nic_dict = {"mac": nic.mac,
10312 "mode": filled_params[constants.NIC_MODE],
10313 "link": filled_params[constants.NIC_LINK],
10315 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10316 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10317 nic_data.append(nic_dict)
10319 "tags": list(iinfo.GetTags()),
10320 "admin_up": iinfo.admin_up,
10321 "vcpus": beinfo[constants.BE_VCPUS],
10322 "memory": beinfo[constants.BE_MEMORY],
10324 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10326 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10327 "disk_template": iinfo.disk_template,
10328 "hypervisor": iinfo.hypervisor,
10330 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10332 instance_data[iinfo.name] = pir
10334 data["instances"] = instance_data
10336 self.in_data = data
10338 def _AddNewInstance(self):
10339 """Add new instance data to allocator structure.
10341 This in combination with _AllocatorGetClusterData will create the
10342 correct structure needed as input for the allocator.
10344 The checks for the completeness of the opcode must have already been
10348 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10350 if self.disk_template in constants.DTS_NET_MIRROR:
10351 self.required_nodes = 2
10353 self.required_nodes = 1
10356 "disk_template": self.disk_template,
10359 "vcpus": self.vcpus,
10360 "memory": self.mem_size,
10361 "disks": self.disks,
10362 "disk_space_total": disk_space,
10364 "required_nodes": self.required_nodes,
10368 def _AddRelocateInstance(self):
10369 """Add relocate instance data to allocator structure.
10371 This in combination with _IAllocatorGetClusterData will create the
10372 correct structure needed as input for the allocator.
10374 The checks for the completeness of the opcode must have already been
10378 instance = self.cfg.GetInstanceInfo(self.name)
10379 if instance is None:
10380 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10381 " IAllocator" % self.name)
10383 if instance.disk_template not in constants.DTS_NET_MIRROR:
10384 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10385 errors.ECODE_INVAL)
10387 if len(instance.secondary_nodes) != 1:
10388 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10389 errors.ECODE_STATE)
10391 self.required_nodes = 1
10392 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10393 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10397 "disk_space_total": disk_space,
10398 "required_nodes": self.required_nodes,
10399 "relocate_from": self.relocate_from,
10403 def _AddEvacuateNodes(self):
10404 """Add evacuate nodes data to allocator structure.
10408 "evac_nodes": self.evac_nodes
10412 def _BuildInputData(self, fn):
10413 """Build input data structures.
10416 self._ComputeClusterData()
10419 request["type"] = self.mode
10420 self.in_data["request"] = request
10422 self.in_text = serializer.Dump(self.in_data)
10424 def Run(self, name, validate=True, call_fn=None):
10425 """Run an instance allocator and return the results.
10428 if call_fn is None:
10429 call_fn = self.rpc.call_iallocator_runner
10431 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10432 result.Raise("Failure while running the iallocator script")
10434 self.out_text = result.payload
10436 self._ValidateResult()
10438 def _ValidateResult(self):
10439 """Process the allocator results.
10441 This will process and if successful save the result in
10442 self.out_data and the other parameters.
10446 rdict = serializer.Load(self.out_text)
10447 except Exception, err:
10448 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10450 if not isinstance(rdict, dict):
10451 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10453 # TODO: remove backwards compatiblity in later versions
10454 if "nodes" in rdict and "result" not in rdict:
10455 rdict["result"] = rdict["nodes"]
10458 for key in "success", "info", "result":
10459 if key not in rdict:
10460 raise errors.OpExecError("Can't parse iallocator results:"
10461 " missing key '%s'" % key)
10462 setattr(self, key, rdict[key])
10464 if not isinstance(rdict["result"], list):
10465 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10467 self.out_data = rdict
10470 class LUTestAllocator(NoHooksLU):
10471 """Run allocator tests.
10473 This LU runs the allocator tests
10477 ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10478 ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10479 ("name", _NoDefault, _TNonEmptyString),
10480 ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10481 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10482 _TOr(_TNone, _TNonEmptyString))))),
10483 ("disks", _NoDefault, _TOr(_TNone, _TList)),
10484 ("hypervisor", None, _TMaybeString),
10485 ("allocator", None, _TMaybeString),
10486 ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10487 ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10488 ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10489 ("os", None, _TMaybeString),
10490 ("disk_template", None, _TMaybeString),
10491 ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10494 def CheckPrereq(self):
10495 """Check prerequisites.
10497 This checks the opcode parameters depending on the director and mode test.
10500 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10501 for attr in ["mem_size", "disks", "disk_template",
10502 "os", "tags", "nics", "vcpus"]:
10503 if not hasattr(self.op, attr):
10504 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10505 attr, errors.ECODE_INVAL)
10506 iname = self.cfg.ExpandInstanceName(self.op.name)
10507 if iname is not None:
10508 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10509 iname, errors.ECODE_EXISTS)
10510 if not isinstance(self.op.nics, list):
10511 raise errors.OpPrereqError("Invalid parameter 'nics'",
10512 errors.ECODE_INVAL)
10513 if not isinstance(self.op.disks, list):
10514 raise errors.OpPrereqError("Invalid parameter 'disks'",
10515 errors.ECODE_INVAL)
10516 for row in self.op.disks:
10517 if (not isinstance(row, dict) or
10518 "size" not in row or
10519 not isinstance(row["size"], int) or
10520 "mode" not in row or
10521 row["mode"] not in ['r', 'w']):
10522 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10523 " parameter", errors.ECODE_INVAL)
10524 if self.op.hypervisor is None:
10525 self.op.hypervisor = self.cfg.GetHypervisorType()
10526 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10527 fname = _ExpandInstanceName(self.cfg, self.op.name)
10528 self.op.name = fname
10529 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10530 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10531 if not hasattr(self.op, "evac_nodes"):
10532 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10533 " opcode input", errors.ECODE_INVAL)
10535 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10536 self.op.mode, errors.ECODE_INVAL)
10538 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10539 if self.op.allocator is None:
10540 raise errors.OpPrereqError("Missing allocator name",
10541 errors.ECODE_INVAL)
10542 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10543 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10544 self.op.direction, errors.ECODE_INVAL)
10546 def Exec(self, feedback_fn):
10547 """Run the allocator test.
10550 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10551 ial = IAllocator(self.cfg, self.rpc,
10554 mem_size=self.op.mem_size,
10555 disks=self.op.disks,
10556 disk_template=self.op.disk_template,
10560 vcpus=self.op.vcpus,
10561 hypervisor=self.op.hypervisor,
10563 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10564 ial = IAllocator(self.cfg, self.rpc,
10567 relocate_from=list(self.relocate_from),
10569 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10570 ial = IAllocator(self.cfg, self.rpc,
10572 evac_nodes=self.op.evac_nodes)
10574 raise errors.ProgrammerError("Uncatched mode %s in"
10575 " LUTestAllocator.Exec", self.op.mode)
10577 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10578 result = ial.in_text
10580 ial.Run(self.op.allocator, validate=False)
10581 result = ial.out_text