4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
57 import ganeti.masterd.instance # pylint: disable-msg=W0611
60 # Modifiable default values; need to define these here before the
64 """Returns an empty list.
71 """Returns an empty dict.
77 #: The without-default default value
81 #: The no-type (value to complex to check it in the type system)
87 """Checks if the given value is not None.
90 return val is not None
94 """Checks if the given value is None.
101 """Checks if the given value is a boolean.
104 return isinstance(val, bool)
108 """Checks if the given value is an integer.
111 return isinstance(val, int)
115 """Checks if the given value is a float.
118 return isinstance(val, float)
122 """Checks if the given value is a string.
125 return isinstance(val, basestring)
129 """Checks if a given value evaluates to a boolean True value.
135 def _TElemOf(target_list):
136 """Builds a function that checks if a given value is a member of a list.
139 return lambda val: val in target_list
144 """Checks if the given value is a list.
147 return isinstance(val, list)
151 """Checks if the given value is a dictionary.
154 return isinstance(val, dict)
157 def _TIsLength(size):
158 """Check is the given container is of the given size.
161 return lambda container: len(container) == size
166 """Combine multiple functions using an AND operation.
170 return compat.all(t(val) for t in args)
175 """Combine multiple functions using an AND operation.
179 return compat.any(t(val) for t in args)
184 """Checks that a modified version of the argument passes the given test.
187 return lambda val: test(fn(val))
192 #: a non-empty string
193 _TNonEmptyString = _TAnd(_TString, _TTrue)
196 #: a maybe non-empty string
197 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
200 #: a maybe boolean (bool or none)
201 _TMaybeBool = _TOr(_TBool, _TNone)
204 #: a positive integer
205 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
207 #: a strictly positive integer
208 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
211 def _TListOf(my_type):
212 """Checks if a given value is a list with all elements of the same type.
216 lambda lst: compat.all(my_type(v) for v in lst))
219 def _TDictOf(key_type, val_type):
220 """Checks a dict type for the type of its key/values.
224 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
225 and compat.all(val_type(v)
226 for v in my_dict.values())))
229 # Common opcode attributes
231 #: output fields for a query operation
232 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
235 #: the shutdown timeout
236 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
239 #: the force parameter
240 _PForce = ("force", False, _TBool)
242 #: a required instance name (for single-instance LUs)
243 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
246 #: a required node name (for single-node LUs)
247 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
249 #: the migration type (live/non-live)
250 _PMigrationMode = ("mode", None, _TOr(_TNone,
251 _TElemOf(constants.HT_MIGRATION_MODES)))
253 #: the obsolete 'live' mode (boolean)
254 _PMigrationLive = ("live", None, _TMaybeBool)
258 class LogicalUnit(object):
259 """Logical Unit base class.
261 Subclasses must follow these rules:
262 - implement ExpandNames
263 - implement CheckPrereq (except when tasklets are used)
264 - implement Exec (except when tasklets are used)
265 - implement BuildHooksEnv
266 - redefine HPATH and HTYPE
267 - optionally redefine their run requirements:
268 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
270 Note that all commands require root permissions.
272 @ivar dry_run_result: the value (if any) that will be returned to the caller
273 in dry-run mode (signalled by opcode dry_run parameter)
274 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
275 they should get if not already defined, and types they must match
283 def __init__(self, processor, op, context, rpc):
284 """Constructor for LogicalUnit.
286 This needs to be overridden in derived classes in order to check op
290 self.proc = processor
292 self.cfg = context.cfg
293 self.context = context
295 # Dicts used to declare locking needs to mcpu
296 self.needed_locks = None
297 self.acquired_locks = {}
298 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
300 self.remove_locks = {}
301 # Used to force good behavior when calling helper functions
302 self.recalculate_locks = {}
305 self.Log = processor.Log # pylint: disable-msg=C0103
306 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
307 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
308 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
309 # support for dry-run
310 self.dry_run_result = None
311 # support for generic debug attribute
312 if (not hasattr(self.op, "debug_level") or
313 not isinstance(self.op.debug_level, int)):
314 self.op.debug_level = 0
319 # The new kind-of-type-system
320 op_id = self.op.OP_ID
321 for attr_name, aval, test in self._OP_PARAMS:
322 if not hasattr(op, attr_name):
323 if aval == _NoDefault:
324 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
325 (op_id, attr_name), errors.ECODE_INVAL)
331 setattr(self.op, attr_name, dval)
332 attr_val = getattr(op, attr_name)
336 if not callable(test):
337 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
338 " given type is not a proper type (%s)" %
339 (op_id, attr_name, test))
340 if not test(attr_val):
341 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
342 self.op.OP_ID, attr_name, type(attr_val), attr_val)
343 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
344 (op_id, attr_name), errors.ECODE_INVAL)
346 self.CheckArguments()
349 """Returns the SshRunner object
353 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
356 ssh = property(fget=__GetSSH)
358 def CheckArguments(self):
359 """Check syntactic validity for the opcode arguments.
361 This method is for doing a simple syntactic check and ensure
362 validity of opcode parameters, without any cluster-related
363 checks. While the same can be accomplished in ExpandNames and/or
364 CheckPrereq, doing these separate is better because:
366 - ExpandNames is left as as purely a lock-related function
367 - CheckPrereq is run after we have acquired locks (and possible
370 The function is allowed to change the self.op attribute so that
371 later methods can no longer worry about missing parameters.
376 def ExpandNames(self):
377 """Expand names for this LU.
379 This method is called before starting to execute the opcode, and it should
380 update all the parameters of the opcode to their canonical form (e.g. a
381 short node name must be fully expanded after this method has successfully
382 completed). This way locking, hooks, logging, ecc. can work correctly.
384 LUs which implement this method must also populate the self.needed_locks
385 member, as a dict with lock levels as keys, and a list of needed lock names
388 - use an empty dict if you don't need any lock
389 - if you don't need any lock at a particular level omit that level
390 - don't put anything for the BGL level
391 - if you want all locks at a level use locking.ALL_SET as a value
393 If you need to share locks (rather than acquire them exclusively) at one
394 level you can modify self.share_locks, setting a true value (usually 1) for
395 that level. By default locks are not shared.
397 This function can also define a list of tasklets, which then will be
398 executed in order instead of the usual LU-level CheckPrereq and Exec
399 functions, if those are not defined by the LU.
403 # Acquire all nodes and one instance
404 self.needed_locks = {
405 locking.LEVEL_NODE: locking.ALL_SET,
406 locking.LEVEL_INSTANCE: ['instance1.example.com'],
408 # Acquire just two nodes
409 self.needed_locks = {
410 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
413 self.needed_locks = {} # No, you can't leave it to the default value None
416 # The implementation of this method is mandatory only if the new LU is
417 # concurrent, so that old LUs don't need to be changed all at the same
420 self.needed_locks = {} # Exclusive LUs don't need locks.
422 raise NotImplementedError
424 def DeclareLocks(self, level):
425 """Declare LU locking needs for a level
427 While most LUs can just declare their locking needs at ExpandNames time,
428 sometimes there's the need to calculate some locks after having acquired
429 the ones before. This function is called just before acquiring locks at a
430 particular level, but after acquiring the ones at lower levels, and permits
431 such calculations. It can be used to modify self.needed_locks, and by
432 default it does nothing.
434 This function is only called if you have something already set in
435 self.needed_locks for the level.
437 @param level: Locking level which is going to be locked
438 @type level: member of ganeti.locking.LEVELS
442 def CheckPrereq(self):
443 """Check prerequisites for this LU.
445 This method should check that the prerequisites for the execution
446 of this LU are fulfilled. It can do internode communication, but
447 it should be idempotent - no cluster or system changes are
450 The method should raise errors.OpPrereqError in case something is
451 not fulfilled. Its return value is ignored.
453 This method should also update all the parameters of the opcode to
454 their canonical form if it hasn't been done by ExpandNames before.
457 if self.tasklets is not None:
458 for (idx, tl) in enumerate(self.tasklets):
459 logging.debug("Checking prerequisites for tasklet %s/%s",
460 idx + 1, len(self.tasklets))
465 def Exec(self, feedback_fn):
468 This method should implement the actual work. It should raise
469 errors.OpExecError for failures that are somewhat dealt with in
473 if self.tasklets is not None:
474 for (idx, tl) in enumerate(self.tasklets):
475 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
478 raise NotImplementedError
480 def BuildHooksEnv(self):
481 """Build hooks environment for this LU.
483 This method should return a three-node tuple consisting of: a dict
484 containing the environment that will be used for running the
485 specific hook for this LU, a list of node names on which the hook
486 should run before the execution, and a list of node names on which
487 the hook should run after the execution.
489 The keys of the dict must not have 'GANETI_' prefixed as this will
490 be handled in the hooks runner. Also note additional keys will be
491 added by the hooks runner. If the LU doesn't define any
492 environment, an empty dict (and not None) should be returned.
494 No nodes should be returned as an empty list (and not None).
496 Note that if the HPATH for a LU class is None, this function will
500 raise NotImplementedError
502 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
503 """Notify the LU about the results of its hooks.
505 This method is called every time a hooks phase is executed, and notifies
506 the Logical Unit about the hooks' result. The LU can then use it to alter
507 its result based on the hooks. By default the method does nothing and the
508 previous result is passed back unchanged but any LU can define it if it
509 wants to use the local cluster hook-scripts somehow.
511 @param phase: one of L{constants.HOOKS_PHASE_POST} or
512 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
513 @param hook_results: the results of the multi-node hooks rpc call
514 @param feedback_fn: function used send feedback back to the caller
515 @param lu_result: the previous Exec result this LU had, or None
517 @return: the new Exec result, based on the previous result
521 # API must be kept, thus we ignore the unused argument and could
522 # be a function warnings
523 # pylint: disable-msg=W0613,R0201
526 def _ExpandAndLockInstance(self):
527 """Helper function to expand and lock an instance.
529 Many LUs that work on an instance take its name in self.op.instance_name
530 and need to expand it and then declare the expanded name for locking. This
531 function does it, and then updates self.op.instance_name to the expanded
532 name. It also initializes needed_locks as a dict, if this hasn't been done
536 if self.needed_locks is None:
537 self.needed_locks = {}
539 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
540 "_ExpandAndLockInstance called with instance-level locks set"
541 self.op.instance_name = _ExpandInstanceName(self.cfg,
542 self.op.instance_name)
543 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
545 def _LockInstancesNodes(self, primary_only=False):
546 """Helper function to declare instances' nodes for locking.
548 This function should be called after locking one or more instances to lock
549 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
550 with all primary or secondary nodes for instances already locked and
551 present in self.needed_locks[locking.LEVEL_INSTANCE].
553 It should be called from DeclareLocks, and for safety only works if
554 self.recalculate_locks[locking.LEVEL_NODE] is set.
556 In the future it may grow parameters to just lock some instance's nodes, or
557 to just lock primaries or secondary nodes, if needed.
559 If should be called in DeclareLocks in a way similar to::
561 if level == locking.LEVEL_NODE:
562 self._LockInstancesNodes()
564 @type primary_only: boolean
565 @param primary_only: only lock primary nodes of locked instances
568 assert locking.LEVEL_NODE in self.recalculate_locks, \
569 "_LockInstancesNodes helper function called with no nodes to recalculate"
571 # TODO: check if we're really been called with the instance locks held
573 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
574 # future we might want to have different behaviors depending on the value
575 # of self.recalculate_locks[locking.LEVEL_NODE]
577 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
578 instance = self.context.cfg.GetInstanceInfo(instance_name)
579 wanted_nodes.append(instance.primary_node)
581 wanted_nodes.extend(instance.secondary_nodes)
583 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
584 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
585 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
586 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
588 del self.recalculate_locks[locking.LEVEL_NODE]
591 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
592 """Simple LU which runs no hooks.
594 This LU is intended as a parent for other LogicalUnits which will
595 run no hooks, in order to reduce duplicate code.
601 def BuildHooksEnv(self):
602 """Empty BuildHooksEnv for NoHooksLu.
604 This just raises an error.
607 assert False, "BuildHooksEnv called for NoHooksLUs"
611 """Tasklet base class.
613 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
614 they can mix legacy code with tasklets. Locking needs to be done in the LU,
615 tasklets know nothing about locks.
617 Subclasses must follow these rules:
618 - Implement CheckPrereq
622 def __init__(self, lu):
629 def CheckPrereq(self):
630 """Check prerequisites for this tasklets.
632 This method should check whether the prerequisites for the execution of
633 this tasklet are fulfilled. It can do internode communication, but it
634 should be idempotent - no cluster or system changes are allowed.
636 The method should raise errors.OpPrereqError in case something is not
637 fulfilled. Its return value is ignored.
639 This method should also update all parameters to their canonical form if it
640 hasn't been done before.
645 def Exec(self, feedback_fn):
646 """Execute the tasklet.
648 This method should implement the actual work. It should raise
649 errors.OpExecError for failures that are somewhat dealt with in code, or
653 raise NotImplementedError
656 def _GetWantedNodes(lu, nodes):
657 """Returns list of checked and expanded node names.
659 @type lu: L{LogicalUnit}
660 @param lu: the logical unit on whose behalf we execute
662 @param nodes: list of node names or None for all nodes
664 @return: the list of nodes, sorted
665 @raise errors.ProgrammerError: if the nodes parameter is wrong type
669 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
670 " non-empty list of nodes whose name is to be expanded.")
672 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
673 return utils.NiceSort(wanted)
676 def _GetWantedInstances(lu, instances):
677 """Returns list of checked and expanded instance names.
679 @type lu: L{LogicalUnit}
680 @param lu: the logical unit on whose behalf we execute
681 @type instances: list
682 @param instances: list of instance names or None for all instances
684 @return: the list of instances, sorted
685 @raise errors.OpPrereqError: if the instances parameter is wrong type
686 @raise errors.OpPrereqError: if any of the passed instances is not found
690 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
692 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
696 def _GetUpdatedParams(old_params, update_dict,
697 use_default=True, use_none=False):
698 """Return the new version of a parameter dictionary.
700 @type old_params: dict
701 @param old_params: old parameters
702 @type update_dict: dict
703 @param update_dict: dict containing new parameter values, or
704 constants.VALUE_DEFAULT to reset the parameter to its default
706 @param use_default: boolean
707 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
708 values as 'to be deleted' values
709 @param use_none: boolean
710 @type use_none: whether to recognise C{None} values as 'to be
713 @return: the new parameter dictionary
716 params_copy = copy.deepcopy(old_params)
717 for key, val in update_dict.iteritems():
718 if ((use_default and val == constants.VALUE_DEFAULT) or
719 (use_none and val is None)):
725 params_copy[key] = val
729 def _CheckOutputFields(static, dynamic, selected):
730 """Checks whether all selected fields are valid.
732 @type static: L{utils.FieldSet}
733 @param static: static fields set
734 @type dynamic: L{utils.FieldSet}
735 @param dynamic: dynamic fields set
742 delta = f.NonMatching(selected)
744 raise errors.OpPrereqError("Unknown output fields selected: %s"
745 % ",".join(delta), errors.ECODE_INVAL)
748 def _CheckGlobalHvParams(params):
749 """Validates that given hypervisor params are not global ones.
751 This will ensure that instances don't get customised versions of
755 used_globals = constants.HVC_GLOBALS.intersection(params)
757 msg = ("The following hypervisor parameters are global and cannot"
758 " be customized at instance level, please modify them at"
759 " cluster level: %s" % utils.CommaJoin(used_globals))
760 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
763 def _CheckNodeOnline(lu, node):
764 """Ensure that a given node is online.
766 @param lu: the LU on behalf of which we make the check
767 @param node: the node to check
768 @raise errors.OpPrereqError: if the node is offline
771 if lu.cfg.GetNodeInfo(node).offline:
772 raise errors.OpPrereqError("Can't use offline node %s" % node,
776 def _CheckNodeNotDrained(lu, node):
777 """Ensure that a given node is not drained.
779 @param lu: the LU on behalf of which we make the check
780 @param node: the node to check
781 @raise errors.OpPrereqError: if the node is drained
784 if lu.cfg.GetNodeInfo(node).drained:
785 raise errors.OpPrereqError("Can't use drained node %s" % node,
789 def _CheckNodeHasOS(lu, node, os_name, force_variant):
790 """Ensure that a node supports a given OS.
792 @param lu: the LU on behalf of which we make the check
793 @param node: the node to check
794 @param os_name: the OS to query about
795 @param force_variant: whether to ignore variant errors
796 @raise errors.OpPrereqError: if the node is not supporting the OS
799 result = lu.rpc.call_os_get(node, os_name)
800 result.Raise("OS '%s' not in supported OS list for node %s" %
802 prereq=True, ecode=errors.ECODE_INVAL)
803 if not force_variant:
804 _CheckOSVariant(result.payload, os_name)
807 def _RequireFileStorage():
808 """Checks that file storage is enabled.
810 @raise errors.OpPrereqError: when file storage is disabled
813 if not constants.ENABLE_FILE_STORAGE:
814 raise errors.OpPrereqError("File storage disabled at configure time",
818 def _CheckDiskTemplate(template):
819 """Ensure a given disk template is valid.
822 if template not in constants.DISK_TEMPLATES:
823 msg = ("Invalid disk template name '%s', valid templates are: %s" %
824 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
825 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
826 if template == constants.DT_FILE:
827 _RequireFileStorage()
831 def _CheckStorageType(storage_type):
832 """Ensure a given storage type is valid.
835 if storage_type not in constants.VALID_STORAGE_TYPES:
836 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
838 if storage_type == constants.ST_FILE:
839 _RequireFileStorage()
843 def _GetClusterDomainSecret():
844 """Reads the cluster domain secret.
847 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
851 def _CheckInstanceDown(lu, instance, reason):
852 """Ensure that an instance is not running."""
853 if instance.admin_up:
854 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
855 (instance.name, reason), errors.ECODE_STATE)
857 pnode = instance.primary_node
858 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
859 ins_l.Raise("Can't contact node %s for instance information" % pnode,
860 prereq=True, ecode=errors.ECODE_ENVIRON)
862 if instance.name in ins_l.payload:
863 raise errors.OpPrereqError("Instance %s is running, %s" %
864 (instance.name, reason), errors.ECODE_STATE)
867 def _ExpandItemName(fn, name, kind):
868 """Expand an item name.
870 @param fn: the function to use for expansion
871 @param name: requested item name
872 @param kind: text description ('Node' or 'Instance')
873 @return: the resolved (full) name
874 @raise errors.OpPrereqError: if the item is not found
878 if full_name is None:
879 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
884 def _ExpandNodeName(cfg, name):
885 """Wrapper over L{_ExpandItemName} for nodes."""
886 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
889 def _ExpandInstanceName(cfg, name):
890 """Wrapper over L{_ExpandItemName} for instance."""
891 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
894 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
895 memory, vcpus, nics, disk_template, disks,
896 bep, hvp, hypervisor_name):
897 """Builds instance related env variables for hooks
899 This builds the hook environment from individual variables.
902 @param name: the name of the instance
903 @type primary_node: string
904 @param primary_node: the name of the instance's primary node
905 @type secondary_nodes: list
906 @param secondary_nodes: list of secondary nodes as strings
907 @type os_type: string
908 @param os_type: the name of the instance's OS
909 @type status: boolean
910 @param status: the should_run status of the instance
912 @param memory: the memory size of the instance
914 @param vcpus: the count of VCPUs the instance has
916 @param nics: list of tuples (ip, mac, mode, link) representing
917 the NICs the instance has
918 @type disk_template: string
919 @param disk_template: the disk template of the instance
921 @param disks: the list of (size, mode) pairs
923 @param bep: the backend parameters for the instance
925 @param hvp: the hypervisor parameters for the instance
926 @type hypervisor_name: string
927 @param hypervisor_name: the hypervisor for the instance
929 @return: the hook environment for this instance
938 "INSTANCE_NAME": name,
939 "INSTANCE_PRIMARY": primary_node,
940 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
941 "INSTANCE_OS_TYPE": os_type,
942 "INSTANCE_STATUS": str_status,
943 "INSTANCE_MEMORY": memory,
944 "INSTANCE_VCPUS": vcpus,
945 "INSTANCE_DISK_TEMPLATE": disk_template,
946 "INSTANCE_HYPERVISOR": hypervisor_name,
950 nic_count = len(nics)
951 for idx, (ip, mac, mode, link) in enumerate(nics):
954 env["INSTANCE_NIC%d_IP" % idx] = ip
955 env["INSTANCE_NIC%d_MAC" % idx] = mac
956 env["INSTANCE_NIC%d_MODE" % idx] = mode
957 env["INSTANCE_NIC%d_LINK" % idx] = link
958 if mode == constants.NIC_MODE_BRIDGED:
959 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
963 env["INSTANCE_NIC_COUNT"] = nic_count
966 disk_count = len(disks)
967 for idx, (size, mode) in enumerate(disks):
968 env["INSTANCE_DISK%d_SIZE" % idx] = size
969 env["INSTANCE_DISK%d_MODE" % idx] = mode
973 env["INSTANCE_DISK_COUNT"] = disk_count
975 for source, kind in [(bep, "BE"), (hvp, "HV")]:
976 for key, value in source.items():
977 env["INSTANCE_%s_%s" % (kind, key)] = value
982 def _NICListToTuple(lu, nics):
983 """Build a list of nic information tuples.
985 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
986 value in LUQueryInstanceData.
988 @type lu: L{LogicalUnit}
989 @param lu: the logical unit on whose behalf we execute
990 @type nics: list of L{objects.NIC}
991 @param nics: list of nics to convert to hooks tuples
995 cluster = lu.cfg.GetClusterInfo()
999 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1000 mode = filled_params[constants.NIC_MODE]
1001 link = filled_params[constants.NIC_LINK]
1002 hooks_nics.append((ip, mac, mode, link))
1006 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1007 """Builds instance related env variables for hooks from an object.
1009 @type lu: L{LogicalUnit}
1010 @param lu: the logical unit on whose behalf we execute
1011 @type instance: L{objects.Instance}
1012 @param instance: the instance for which we should build the
1014 @type override: dict
1015 @param override: dictionary with key/values that will override
1018 @return: the hook environment dictionary
1021 cluster = lu.cfg.GetClusterInfo()
1022 bep = cluster.FillBE(instance)
1023 hvp = cluster.FillHV(instance)
1025 'name': instance.name,
1026 'primary_node': instance.primary_node,
1027 'secondary_nodes': instance.secondary_nodes,
1028 'os_type': instance.os,
1029 'status': instance.admin_up,
1030 'memory': bep[constants.BE_MEMORY],
1031 'vcpus': bep[constants.BE_VCPUS],
1032 'nics': _NICListToTuple(lu, instance.nics),
1033 'disk_template': instance.disk_template,
1034 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1037 'hypervisor_name': instance.hypervisor,
1040 args.update(override)
1041 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1044 def _AdjustCandidatePool(lu, exceptions):
1045 """Adjust the candidate pool after node operations.
1048 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1050 lu.LogInfo("Promoted nodes to master candidate role: %s",
1051 utils.CommaJoin(node.name for node in mod_list))
1052 for name in mod_list:
1053 lu.context.ReaddNode(name)
1054 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1056 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1060 def _DecideSelfPromotion(lu, exceptions=None):
1061 """Decide whether I should promote myself as a master candidate.
1064 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1065 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1066 # the new node will increase mc_max with one, so:
1067 mc_should = min(mc_should + 1, cp_size)
1068 return mc_now < mc_should
1071 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1072 """Check that the brigdes needed by a list of nics exist.
1075 cluster = lu.cfg.GetClusterInfo()
1076 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1077 brlist = [params[constants.NIC_LINK] for params in paramslist
1078 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1080 result = lu.rpc.call_bridges_exist(target_node, brlist)
1081 result.Raise("Error checking bridges on destination node '%s'" %
1082 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1085 def _CheckInstanceBridgesExist(lu, instance, node=None):
1086 """Check that the brigdes needed by an instance exist.
1090 node = instance.primary_node
1091 _CheckNicsBridgesExist(lu, instance.nics, node)
1094 def _CheckOSVariant(os_obj, name):
1095 """Check whether an OS name conforms to the os variants specification.
1097 @type os_obj: L{objects.OS}
1098 @param os_obj: OS object to check
1100 @param name: OS name passed by the user, to check for validity
1103 if not os_obj.supported_variants:
1105 variant = objects.OS.GetVariant(name)
1107 raise errors.OpPrereqError("OS name must include a variant",
1110 if variant not in os_obj.supported_variants:
1111 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1114 def _GetNodeInstancesInner(cfg, fn):
1115 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1118 def _GetNodeInstances(cfg, node_name):
1119 """Returns a list of all primary and secondary instances on a node.
1123 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1126 def _GetNodePrimaryInstances(cfg, node_name):
1127 """Returns primary instances on a node.
1130 return _GetNodeInstancesInner(cfg,
1131 lambda inst: node_name == inst.primary_node)
1134 def _GetNodeSecondaryInstances(cfg, node_name):
1135 """Returns secondary instances on a node.
1138 return _GetNodeInstancesInner(cfg,
1139 lambda inst: node_name in inst.secondary_nodes)
1142 def _GetStorageTypeArgs(cfg, storage_type):
1143 """Returns the arguments for a storage type.
1146 # Special case for file storage
1147 if storage_type == constants.ST_FILE:
1148 # storage.FileStorage wants a list of storage directories
1149 return [[cfg.GetFileStorageDir()]]
1154 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1157 for dev in instance.disks:
1158 cfg.SetDiskID(dev, node_name)
1160 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1161 result.Raise("Failed to get disk status from node %s" % node_name,
1162 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1164 for idx, bdev_status in enumerate(result.payload):
1165 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1171 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1172 """Check the sanity of iallocator and node arguments and use the
1173 cluster-wide iallocator if appropriate.
1175 Check that at most one of (iallocator, node) is specified. If none is
1176 specified, then the LU's opcode's iallocator slot is filled with the
1177 cluster-wide default iallocator.
1179 @type iallocator_slot: string
1180 @param iallocator_slot: the name of the opcode iallocator slot
1181 @type node_slot: string
1182 @param node_slot: the name of the opcode target node slot
1185 node = getattr(lu.op, node_slot, None)
1186 iallocator = getattr(lu.op, iallocator_slot, None)
1188 if node is not None and iallocator is not None:
1189 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1191 elif node is None and iallocator is None:
1192 default_iallocator = lu.cfg.GetDefaultIAllocator()
1193 if default_iallocator:
1194 setattr(lu.op, iallocator_slot, default_iallocator)
1196 raise errors.OpPrereqError("No iallocator or node given and no"
1197 " cluster-wide default iallocator found."
1198 " Please specify either an iallocator or a"
1199 " node, or set a cluster-wide default"
1203 class LUPostInitCluster(LogicalUnit):
1204 """Logical unit for running hooks after cluster initialization.
1207 HPATH = "cluster-init"
1208 HTYPE = constants.HTYPE_CLUSTER
1210 def BuildHooksEnv(self):
1214 env = {"OP_TARGET": self.cfg.GetClusterName()}
1215 mn = self.cfg.GetMasterNode()
1216 return env, [], [mn]
1218 def Exec(self, feedback_fn):
1225 class LUDestroyCluster(LogicalUnit):
1226 """Logical unit for destroying the cluster.
1229 HPATH = "cluster-destroy"
1230 HTYPE = constants.HTYPE_CLUSTER
1232 def BuildHooksEnv(self):
1236 env = {"OP_TARGET": self.cfg.GetClusterName()}
1239 def CheckPrereq(self):
1240 """Check prerequisites.
1242 This checks whether the cluster is empty.
1244 Any errors are signaled by raising errors.OpPrereqError.
1247 master = self.cfg.GetMasterNode()
1249 nodelist = self.cfg.GetNodeList()
1250 if len(nodelist) != 1 or nodelist[0] != master:
1251 raise errors.OpPrereqError("There are still %d node(s) in"
1252 " this cluster." % (len(nodelist) - 1),
1254 instancelist = self.cfg.GetInstanceList()
1256 raise errors.OpPrereqError("There are still %d instance(s) in"
1257 " this cluster." % len(instancelist),
1260 def Exec(self, feedback_fn):
1261 """Destroys the cluster.
1264 master = self.cfg.GetMasterNode()
1265 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1267 # Run post hooks on master node before it's removed
1268 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1270 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1272 # pylint: disable-msg=W0702
1273 self.LogWarning("Errors occurred running hooks on %s" % master)
1275 result = self.rpc.call_node_stop_master(master, False)
1276 result.Raise("Could not disable the master role")
1278 if modify_ssh_setup:
1279 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1280 utils.CreateBackup(priv_key)
1281 utils.CreateBackup(pub_key)
1286 def _VerifyCertificate(filename):
1287 """Verifies a certificate for LUVerifyCluster.
1289 @type filename: string
1290 @param filename: Path to PEM file
1294 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1295 utils.ReadFile(filename))
1296 except Exception, err: # pylint: disable-msg=W0703
1297 return (LUVerifyCluster.ETYPE_ERROR,
1298 "Failed to load X509 certificate %s: %s" % (filename, err))
1301 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1302 constants.SSL_CERT_EXPIRATION_ERROR)
1305 fnamemsg = "While verifying %s: %s" % (filename, msg)
1310 return (None, fnamemsg)
1311 elif errcode == utils.CERT_WARNING:
1312 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1313 elif errcode == utils.CERT_ERROR:
1314 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1316 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1319 class LUVerifyCluster(LogicalUnit):
1320 """Verifies the cluster status.
1323 HPATH = "cluster-verify"
1324 HTYPE = constants.HTYPE_CLUSTER
1326 ("skip_checks", _EmptyList,
1327 _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1328 ("verbose", False, _TBool),
1329 ("error_codes", False, _TBool),
1330 ("debug_simulate_errors", False, _TBool),
1334 TCLUSTER = "cluster"
1336 TINSTANCE = "instance"
1338 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1339 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1340 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1341 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1342 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1343 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1344 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1345 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1346 ENODEDRBD = (TNODE, "ENODEDRBD")
1347 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1348 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1349 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1350 ENODEHV = (TNODE, "ENODEHV")
1351 ENODELVM = (TNODE, "ENODELVM")
1352 ENODEN1 = (TNODE, "ENODEN1")
1353 ENODENET = (TNODE, "ENODENET")
1354 ENODEOS = (TNODE, "ENODEOS")
1355 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1356 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1357 ENODERPC = (TNODE, "ENODERPC")
1358 ENODESSH = (TNODE, "ENODESSH")
1359 ENODEVERSION = (TNODE, "ENODEVERSION")
1360 ENODESETUP = (TNODE, "ENODESETUP")
1361 ENODETIME = (TNODE, "ENODETIME")
1363 ETYPE_FIELD = "code"
1364 ETYPE_ERROR = "ERROR"
1365 ETYPE_WARNING = "WARNING"
1367 class NodeImage(object):
1368 """A class representing the logical and physical status of a node.
1371 @ivar name: the node name to which this object refers
1372 @ivar volumes: a structure as returned from
1373 L{ganeti.backend.GetVolumeList} (runtime)
1374 @ivar instances: a list of running instances (runtime)
1375 @ivar pinst: list of configured primary instances (config)
1376 @ivar sinst: list of configured secondary instances (config)
1377 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1378 of this node (config)
1379 @ivar mfree: free memory, as reported by hypervisor (runtime)
1380 @ivar dfree: free disk, as reported by the node (runtime)
1381 @ivar offline: the offline status (config)
1382 @type rpc_fail: boolean
1383 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1384 not whether the individual keys were correct) (runtime)
1385 @type lvm_fail: boolean
1386 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1387 @type hyp_fail: boolean
1388 @ivar hyp_fail: whether the RPC call didn't return the instance list
1389 @type ghost: boolean
1390 @ivar ghost: whether this is a known node or not (config)
1391 @type os_fail: boolean
1392 @ivar os_fail: whether the RPC call didn't return valid OS data
1394 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1397 def __init__(self, offline=False, name=None):
1406 self.offline = offline
1407 self.rpc_fail = False
1408 self.lvm_fail = False
1409 self.hyp_fail = False
1411 self.os_fail = False
1414 def ExpandNames(self):
1415 self.needed_locks = {
1416 locking.LEVEL_NODE: locking.ALL_SET,
1417 locking.LEVEL_INSTANCE: locking.ALL_SET,
1419 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1421 def _Error(self, ecode, item, msg, *args, **kwargs):
1422 """Format an error message.
1424 Based on the opcode's error_codes parameter, either format a
1425 parseable error code, or a simpler error string.
1427 This must be called only from Exec and functions called from Exec.
1430 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1432 # first complete the msg
1435 # then format the whole message
1436 if self.op.error_codes:
1437 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1443 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1444 # and finally report it via the feedback_fn
1445 self._feedback_fn(" - %s" % msg)
1447 def _ErrorIf(self, cond, *args, **kwargs):
1448 """Log an error message if the passed condition is True.
1451 cond = bool(cond) or self.op.debug_simulate_errors
1453 self._Error(*args, **kwargs)
1454 # do not mark the operation as failed for WARN cases only
1455 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1456 self.bad = self.bad or cond
1458 def _VerifyNode(self, ninfo, nresult):
1459 """Perform some basic validation on data returned from a node.
1461 - check the result data structure is well formed and has all the
1463 - check ganeti version
1465 @type ninfo: L{objects.Node}
1466 @param ninfo: the node to check
1467 @param nresult: the results from the node
1469 @return: whether overall this call was successful (and we can expect
1470 reasonable values in the respose)
1474 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1476 # main result, nresult should be a non-empty dict
1477 test = not nresult or not isinstance(nresult, dict)
1478 _ErrorIf(test, self.ENODERPC, node,
1479 "unable to verify node: no data returned")
1483 # compares ganeti version
1484 local_version = constants.PROTOCOL_VERSION
1485 remote_version = nresult.get("version", None)
1486 test = not (remote_version and
1487 isinstance(remote_version, (list, tuple)) and
1488 len(remote_version) == 2)
1489 _ErrorIf(test, self.ENODERPC, node,
1490 "connection to node returned invalid data")
1494 test = local_version != remote_version[0]
1495 _ErrorIf(test, self.ENODEVERSION, node,
1496 "incompatible protocol versions: master %s,"
1497 " node %s", local_version, remote_version[0])
1501 # node seems compatible, we can actually try to look into its results
1503 # full package version
1504 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1505 self.ENODEVERSION, node,
1506 "software version mismatch: master %s, node %s",
1507 constants.RELEASE_VERSION, remote_version[1],
1508 code=self.ETYPE_WARNING)
1510 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1511 if isinstance(hyp_result, dict):
1512 for hv_name, hv_result in hyp_result.iteritems():
1513 test = hv_result is not None
1514 _ErrorIf(test, self.ENODEHV, node,
1515 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1518 test = nresult.get(constants.NV_NODESETUP,
1519 ["Missing NODESETUP results"])
1520 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1525 def _VerifyNodeTime(self, ninfo, nresult,
1526 nvinfo_starttime, nvinfo_endtime):
1527 """Check the node time.
1529 @type ninfo: L{objects.Node}
1530 @param ninfo: the node to check
1531 @param nresult: the remote results for the node
1532 @param nvinfo_starttime: the start time of the RPC call
1533 @param nvinfo_endtime: the end time of the RPC call
1537 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1539 ntime = nresult.get(constants.NV_TIME, None)
1541 ntime_merged = utils.MergeTime(ntime)
1542 except (ValueError, TypeError):
1543 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1546 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1547 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1548 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1549 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1553 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1554 "Node time diverges by at least %s from master node time",
1557 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1558 """Check the node time.
1560 @type ninfo: L{objects.Node}
1561 @param ninfo: the node to check
1562 @param nresult: the remote results for the node
1563 @param vg_name: the configured VG name
1570 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1572 # checks vg existence and size > 20G
1573 vglist = nresult.get(constants.NV_VGLIST, None)
1575 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1577 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1578 constants.MIN_VG_SIZE)
1579 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1582 pvlist = nresult.get(constants.NV_PVLIST, None)
1583 test = pvlist is None
1584 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1586 # check that ':' is not present in PV names, since it's a
1587 # special character for lvcreate (denotes the range of PEs to
1589 for _, pvname, owner_vg in pvlist:
1590 test = ":" in pvname
1591 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1592 " '%s' of VG '%s'", pvname, owner_vg)
1594 def _VerifyNodeNetwork(self, ninfo, nresult):
1595 """Check the node time.
1597 @type ninfo: L{objects.Node}
1598 @param ninfo: the node to check
1599 @param nresult: the remote results for the node
1603 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1605 test = constants.NV_NODELIST not in nresult
1606 _ErrorIf(test, self.ENODESSH, node,
1607 "node hasn't returned node ssh connectivity data")
1609 if nresult[constants.NV_NODELIST]:
1610 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1611 _ErrorIf(True, self.ENODESSH, node,
1612 "ssh communication with node '%s': %s", a_node, a_msg)
1614 test = constants.NV_NODENETTEST not in nresult
1615 _ErrorIf(test, self.ENODENET, node,
1616 "node hasn't returned node tcp connectivity data")
1618 if nresult[constants.NV_NODENETTEST]:
1619 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1621 _ErrorIf(True, self.ENODENET, node,
1622 "tcp communication with node '%s': %s",
1623 anode, nresult[constants.NV_NODENETTEST][anode])
1625 test = constants.NV_MASTERIP not in nresult
1626 _ErrorIf(test, self.ENODENET, node,
1627 "node hasn't returned node master IP reachability data")
1629 if not nresult[constants.NV_MASTERIP]:
1630 if node == self.master_node:
1631 msg = "the master node cannot reach the master IP (not configured?)"
1633 msg = "cannot reach the master IP"
1634 _ErrorIf(True, self.ENODENET, node, msg)
1637 def _VerifyInstance(self, instance, instanceconfig, node_image):
1638 """Verify an instance.
1640 This function checks to see if the required block devices are
1641 available on the instance's node.
1644 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1645 node_current = instanceconfig.primary_node
1647 node_vol_should = {}
1648 instanceconfig.MapLVsByNode(node_vol_should)
1650 for node in node_vol_should:
1651 n_img = node_image[node]
1652 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1653 # ignore missing volumes on offline or broken nodes
1655 for volume in node_vol_should[node]:
1656 test = volume not in n_img.volumes
1657 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1658 "volume %s missing on node %s", volume, node)
1660 if instanceconfig.admin_up:
1661 pri_img = node_image[node_current]
1662 test = instance not in pri_img.instances and not pri_img.offline
1663 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1664 "instance not running on its primary node %s",
1667 for node, n_img in node_image.items():
1668 if (not node == node_current):
1669 test = instance in n_img.instances
1670 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1671 "instance should not run on node %s", node)
1673 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1674 """Verify if there are any unknown volumes in the cluster.
1676 The .os, .swap and backup volumes are ignored. All other volumes are
1677 reported as unknown.
1679 @type reserved: L{ganeti.utils.FieldSet}
1680 @param reserved: a FieldSet of reserved volume names
1683 for node, n_img in node_image.items():
1684 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1685 # skip non-healthy nodes
1687 for volume in n_img.volumes:
1688 test = ((node not in node_vol_should or
1689 volume not in node_vol_should[node]) and
1690 not reserved.Matches(volume))
1691 self._ErrorIf(test, self.ENODEORPHANLV, node,
1692 "volume %s is unknown", volume)
1694 def _VerifyOrphanInstances(self, instancelist, node_image):
1695 """Verify the list of running instances.
1697 This checks what instances are running but unknown to the cluster.
1700 for node, n_img in node_image.items():
1701 for o_inst in n_img.instances:
1702 test = o_inst not in instancelist
1703 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1704 "instance %s on node %s should not exist", o_inst, node)
1706 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1707 """Verify N+1 Memory Resilience.
1709 Check that if one single node dies we can still start all the
1710 instances it was primary for.
1713 for node, n_img in node_image.items():
1714 # This code checks that every node which is now listed as
1715 # secondary has enough memory to host all instances it is
1716 # supposed to should a single other node in the cluster fail.
1717 # FIXME: not ready for failover to an arbitrary node
1718 # FIXME: does not support file-backed instances
1719 # WARNING: we currently take into account down instances as well
1720 # as up ones, considering that even if they're down someone
1721 # might want to start them even in the event of a node failure.
1722 for prinode, instances in n_img.sbp.items():
1724 for instance in instances:
1725 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1726 if bep[constants.BE_AUTO_BALANCE]:
1727 needed_mem += bep[constants.BE_MEMORY]
1728 test = n_img.mfree < needed_mem
1729 self._ErrorIf(test, self.ENODEN1, node,
1730 "not enough memory on to accommodate"
1731 " failovers should peer node %s fail", prinode)
1733 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1735 """Verifies and computes the node required file checksums.
1737 @type ninfo: L{objects.Node}
1738 @param ninfo: the node to check
1739 @param nresult: the remote results for the node
1740 @param file_list: required list of files
1741 @param local_cksum: dictionary of local files and their checksums
1742 @param master_files: list of files that only masters should have
1746 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1748 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1749 test = not isinstance(remote_cksum, dict)
1750 _ErrorIf(test, self.ENODEFILECHECK, node,
1751 "node hasn't returned file checksum data")
1755 for file_name in file_list:
1756 node_is_mc = ninfo.master_candidate
1757 must_have = (file_name not in master_files) or node_is_mc
1759 test1 = file_name not in remote_cksum
1761 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1763 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1764 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1765 "file '%s' missing", file_name)
1766 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1767 "file '%s' has wrong checksum", file_name)
1768 # not candidate and this is not a must-have file
1769 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1770 "file '%s' should not exist on non master"
1771 " candidates (and the file is outdated)", file_name)
1772 # all good, except non-master/non-must have combination
1773 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1774 "file '%s' should not exist"
1775 " on non master candidates", file_name)
1777 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1779 """Verifies and the node DRBD status.
1781 @type ninfo: L{objects.Node}
1782 @param ninfo: the node to check
1783 @param nresult: the remote results for the node
1784 @param instanceinfo: the dict of instances
1785 @param drbd_helper: the configured DRBD usermode helper
1786 @param drbd_map: the DRBD map as returned by
1787 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1791 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1794 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1795 test = (helper_result == None)
1796 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1797 "no drbd usermode helper returned")
1799 status, payload = helper_result
1801 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1802 "drbd usermode helper check unsuccessful: %s", payload)
1803 test = status and (payload != drbd_helper)
1804 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1805 "wrong drbd usermode helper: %s", payload)
1807 # compute the DRBD minors
1809 for minor, instance in drbd_map[node].items():
1810 test = instance not in instanceinfo
1811 _ErrorIf(test, self.ECLUSTERCFG, None,
1812 "ghost instance '%s' in temporary DRBD map", instance)
1813 # ghost instance should not be running, but otherwise we
1814 # don't give double warnings (both ghost instance and
1815 # unallocated minor in use)
1817 node_drbd[minor] = (instance, False)
1819 instance = instanceinfo[instance]
1820 node_drbd[minor] = (instance.name, instance.admin_up)
1822 # and now check them
1823 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1824 test = not isinstance(used_minors, (tuple, list))
1825 _ErrorIf(test, self.ENODEDRBD, node,
1826 "cannot parse drbd status file: %s", str(used_minors))
1828 # we cannot check drbd status
1831 for minor, (iname, must_exist) in node_drbd.items():
1832 test = minor not in used_minors and must_exist
1833 _ErrorIf(test, self.ENODEDRBD, node,
1834 "drbd minor %d of instance %s is not active", minor, iname)
1835 for minor in used_minors:
1836 test = minor not in node_drbd
1837 _ErrorIf(test, self.ENODEDRBD, node,
1838 "unallocated drbd minor %d is in use", minor)
1840 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1841 """Builds the node OS structures.
1843 @type ninfo: L{objects.Node}
1844 @param ninfo: the node to check
1845 @param nresult: the remote results for the node
1846 @param nimg: the node image object
1850 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1852 remote_os = nresult.get(constants.NV_OSLIST, None)
1853 test = (not isinstance(remote_os, list) or
1854 not compat.all(isinstance(v, list) and len(v) == 7
1855 for v in remote_os))
1857 _ErrorIf(test, self.ENODEOS, node,
1858 "node hasn't returned valid OS data")
1867 for (name, os_path, status, diagnose,
1868 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1870 if name not in os_dict:
1873 # parameters is a list of lists instead of list of tuples due to
1874 # JSON lacking a real tuple type, fix it:
1875 parameters = [tuple(v) for v in parameters]
1876 os_dict[name].append((os_path, status, diagnose,
1877 set(variants), set(parameters), set(api_ver)))
1879 nimg.oslist = os_dict
1881 def _VerifyNodeOS(self, ninfo, nimg, base):
1882 """Verifies the node OS list.
1884 @type ninfo: L{objects.Node}
1885 @param ninfo: the node to check
1886 @param nimg: the node image object
1887 @param base: the 'template' node we match against (e.g. from the master)
1891 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1893 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1895 for os_name, os_data in nimg.oslist.items():
1896 assert os_data, "Empty OS status for OS %s?!" % os_name
1897 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1898 _ErrorIf(not f_status, self.ENODEOS, node,
1899 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1900 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1901 "OS '%s' has multiple entries (first one shadows the rest): %s",
1902 os_name, utils.CommaJoin([v[0] for v in os_data]))
1903 # this will catched in backend too
1904 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1905 and not f_var, self.ENODEOS, node,
1906 "OS %s with API at least %d does not declare any variant",
1907 os_name, constants.OS_API_V15)
1908 # comparisons with the 'base' image
1909 test = os_name not in base.oslist
1910 _ErrorIf(test, self.ENODEOS, node,
1911 "Extra OS %s not present on reference node (%s)",
1915 assert base.oslist[os_name], "Base node has empty OS status?"
1916 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1918 # base OS is invalid, skipping
1920 for kind, a, b in [("API version", f_api, b_api),
1921 ("variants list", f_var, b_var),
1922 ("parameters", f_param, b_param)]:
1923 _ErrorIf(a != b, self.ENODEOS, node,
1924 "OS %s %s differs from reference node %s: %s vs. %s",
1925 kind, os_name, base.name,
1926 utils.CommaJoin(a), utils.CommaJoin(b))
1928 # check any missing OSes
1929 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1930 _ErrorIf(missing, self.ENODEOS, node,
1931 "OSes present on reference node %s but missing on this node: %s",
1932 base.name, utils.CommaJoin(missing))
1934 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1935 """Verifies and updates the node volume data.
1937 This function will update a L{NodeImage}'s internal structures
1938 with data from the remote call.
1940 @type ninfo: L{objects.Node}
1941 @param ninfo: the node to check
1942 @param nresult: the remote results for the node
1943 @param nimg: the node image object
1944 @param vg_name: the configured VG name
1948 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1950 nimg.lvm_fail = True
1951 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1954 elif isinstance(lvdata, basestring):
1955 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1956 utils.SafeEncode(lvdata))
1957 elif not isinstance(lvdata, dict):
1958 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1960 nimg.volumes = lvdata
1961 nimg.lvm_fail = False
1963 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1964 """Verifies and updates the node instance list.
1966 If the listing was successful, then updates this node's instance
1967 list. Otherwise, it marks the RPC call as failed for the instance
1970 @type ninfo: L{objects.Node}
1971 @param ninfo: the node to check
1972 @param nresult: the remote results for the node
1973 @param nimg: the node image object
1976 idata = nresult.get(constants.NV_INSTANCELIST, None)
1977 test = not isinstance(idata, list)
1978 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1979 " (instancelist): %s", utils.SafeEncode(str(idata)))
1981 nimg.hyp_fail = True
1983 nimg.instances = idata
1985 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1986 """Verifies and computes a node information map
1988 @type ninfo: L{objects.Node}
1989 @param ninfo: the node to check
1990 @param nresult: the remote results for the node
1991 @param nimg: the node image object
1992 @param vg_name: the configured VG name
1996 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1998 # try to read free memory (from the hypervisor)
1999 hv_info = nresult.get(constants.NV_HVINFO, None)
2000 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2001 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2004 nimg.mfree = int(hv_info["memory_free"])
2005 except (ValueError, TypeError):
2006 _ErrorIf(True, self.ENODERPC, node,
2007 "node returned invalid nodeinfo, check hypervisor")
2009 # FIXME: devise a free space model for file based instances as well
2010 if vg_name is not None:
2011 test = (constants.NV_VGLIST not in nresult or
2012 vg_name not in nresult[constants.NV_VGLIST])
2013 _ErrorIf(test, self.ENODELVM, node,
2014 "node didn't return data for the volume group '%s'"
2015 " - it is either missing or broken", vg_name)
2018 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2019 except (ValueError, TypeError):
2020 _ErrorIf(True, self.ENODERPC, node,
2021 "node returned invalid LVM info, check LVM status")
2023 def BuildHooksEnv(self):
2026 Cluster-Verify hooks just ran in the post phase and their failure makes
2027 the output be logged in the verify output and the verification to fail.
2030 all_nodes = self.cfg.GetNodeList()
2032 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2034 for node in self.cfg.GetAllNodesInfo().values():
2035 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2037 return env, [], all_nodes
2039 def Exec(self, feedback_fn):
2040 """Verify integrity of cluster, performing various test on nodes.
2044 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2045 verbose = self.op.verbose
2046 self._feedback_fn = feedback_fn
2047 feedback_fn("* Verifying global settings")
2048 for msg in self.cfg.VerifyConfig():
2049 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2051 # Check the cluster certificates
2052 for cert_filename in constants.ALL_CERT_FILES:
2053 (errcode, msg) = _VerifyCertificate(cert_filename)
2054 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2056 vg_name = self.cfg.GetVGName()
2057 drbd_helper = self.cfg.GetDRBDHelper()
2058 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2059 cluster = self.cfg.GetClusterInfo()
2060 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2061 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2062 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2063 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2064 for iname in instancelist)
2065 i_non_redundant = [] # Non redundant instances
2066 i_non_a_balanced = [] # Non auto-balanced instances
2067 n_offline = 0 # Count of offline nodes
2068 n_drained = 0 # Count of nodes being drained
2069 node_vol_should = {}
2071 # FIXME: verify OS list
2072 # do local checksums
2073 master_files = [constants.CLUSTER_CONF_FILE]
2074 master_node = self.master_node = self.cfg.GetMasterNode()
2075 master_ip = self.cfg.GetMasterIP()
2077 file_names = ssconf.SimpleStore().GetFileList()
2078 file_names.extend(constants.ALL_CERT_FILES)
2079 file_names.extend(master_files)
2080 if cluster.modify_etc_hosts:
2081 file_names.append(constants.ETC_HOSTS)
2083 local_checksums = utils.FingerprintFiles(file_names)
2085 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2086 node_verify_param = {
2087 constants.NV_FILELIST: file_names,
2088 constants.NV_NODELIST: [node.name for node in nodeinfo
2089 if not node.offline],
2090 constants.NV_HYPERVISOR: hypervisors,
2091 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2092 node.secondary_ip) for node in nodeinfo
2093 if not node.offline],
2094 constants.NV_INSTANCELIST: hypervisors,
2095 constants.NV_VERSION: None,
2096 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2097 constants.NV_NODESETUP: None,
2098 constants.NV_TIME: None,
2099 constants.NV_MASTERIP: (master_node, master_ip),
2100 constants.NV_OSLIST: None,
2103 if vg_name is not None:
2104 node_verify_param[constants.NV_VGLIST] = None
2105 node_verify_param[constants.NV_LVLIST] = vg_name
2106 node_verify_param[constants.NV_PVLIST] = [vg_name]
2107 node_verify_param[constants.NV_DRBDLIST] = None
2110 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2112 # Build our expected cluster state
2113 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2115 for node in nodeinfo)
2117 for instance in instancelist:
2118 inst_config = instanceinfo[instance]
2120 for nname in inst_config.all_nodes:
2121 if nname not in node_image:
2123 gnode = self.NodeImage(name=nname)
2125 node_image[nname] = gnode
2127 inst_config.MapLVsByNode(node_vol_should)
2129 pnode = inst_config.primary_node
2130 node_image[pnode].pinst.append(instance)
2132 for snode in inst_config.secondary_nodes:
2133 nimg = node_image[snode]
2134 nimg.sinst.append(instance)
2135 if pnode not in nimg.sbp:
2136 nimg.sbp[pnode] = []
2137 nimg.sbp[pnode].append(instance)
2139 # At this point, we have the in-memory data structures complete,
2140 # except for the runtime information, which we'll gather next
2142 # Due to the way our RPC system works, exact response times cannot be
2143 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2144 # time before and after executing the request, we can at least have a time
2146 nvinfo_starttime = time.time()
2147 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2148 self.cfg.GetClusterName())
2149 nvinfo_endtime = time.time()
2151 all_drbd_map = self.cfg.ComputeDRBDMap()
2153 feedback_fn("* Verifying node status")
2157 for node_i in nodeinfo:
2159 nimg = node_image[node]
2163 feedback_fn("* Skipping offline node %s" % (node,))
2167 if node == master_node:
2169 elif node_i.master_candidate:
2170 ntype = "master candidate"
2171 elif node_i.drained:
2177 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2179 msg = all_nvinfo[node].fail_msg
2180 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2182 nimg.rpc_fail = True
2185 nresult = all_nvinfo[node].payload
2187 nimg.call_ok = self._VerifyNode(node_i, nresult)
2188 self._VerifyNodeNetwork(node_i, nresult)
2189 self._VerifyNodeLVM(node_i, nresult, vg_name)
2190 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2192 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2194 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2196 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2197 self._UpdateNodeInstances(node_i, nresult, nimg)
2198 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2199 self._UpdateNodeOS(node_i, nresult, nimg)
2200 if not nimg.os_fail:
2201 if refos_img is None:
2203 self._VerifyNodeOS(node_i, nimg, refos_img)
2205 feedback_fn("* Verifying instance status")
2206 for instance in instancelist:
2208 feedback_fn("* Verifying instance %s" % instance)
2209 inst_config = instanceinfo[instance]
2210 self._VerifyInstance(instance, inst_config, node_image)
2211 inst_nodes_offline = []
2213 pnode = inst_config.primary_node
2214 pnode_img = node_image[pnode]
2215 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2216 self.ENODERPC, pnode, "instance %s, connection to"
2217 " primary node failed", instance)
2219 if pnode_img.offline:
2220 inst_nodes_offline.append(pnode)
2222 # If the instance is non-redundant we cannot survive losing its primary
2223 # node, so we are not N+1 compliant. On the other hand we have no disk
2224 # templates with more than one secondary so that situation is not well
2226 # FIXME: does not support file-backed instances
2227 if not inst_config.secondary_nodes:
2228 i_non_redundant.append(instance)
2229 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2230 instance, "instance has multiple secondary nodes: %s",
2231 utils.CommaJoin(inst_config.secondary_nodes),
2232 code=self.ETYPE_WARNING)
2234 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2235 i_non_a_balanced.append(instance)
2237 for snode in inst_config.secondary_nodes:
2238 s_img = node_image[snode]
2239 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2240 "instance %s, connection to secondary node failed", instance)
2243 inst_nodes_offline.append(snode)
2245 # warn that the instance lives on offline nodes
2246 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2247 "instance lives on offline node(s) %s",
2248 utils.CommaJoin(inst_nodes_offline))
2249 # ... or ghost nodes
2250 for node in inst_config.all_nodes:
2251 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2252 "instance lives on ghost node %s", node)
2254 feedback_fn("* Verifying orphan volumes")
2255 reserved = utils.FieldSet(*cluster.reserved_lvs)
2256 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2258 feedback_fn("* Verifying orphan instances")
2259 self._VerifyOrphanInstances(instancelist, node_image)
2261 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2262 feedback_fn("* Verifying N+1 Memory redundancy")
2263 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2265 feedback_fn("* Other Notes")
2267 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2268 % len(i_non_redundant))
2270 if i_non_a_balanced:
2271 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2272 % len(i_non_a_balanced))
2275 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2278 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2282 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2283 """Analyze the post-hooks' result
2285 This method analyses the hook result, handles it, and sends some
2286 nicely-formatted feedback back to the user.
2288 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2289 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2290 @param hooks_results: the results of the multi-node hooks rpc call
2291 @param feedback_fn: function used send feedback back to the caller
2292 @param lu_result: previous Exec result
2293 @return: the new Exec result, based on the previous result
2297 # We only really run POST phase hooks, and are only interested in
2299 if phase == constants.HOOKS_PHASE_POST:
2300 # Used to change hooks' output to proper indentation
2301 indent_re = re.compile('^', re.M)
2302 feedback_fn("* Hooks Results")
2303 assert hooks_results, "invalid result from hooks"
2305 for node_name in hooks_results:
2306 res = hooks_results[node_name]
2308 test = msg and not res.offline
2309 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2310 "Communication failure in hooks execution: %s", msg)
2311 if res.offline or msg:
2312 # No need to investigate payload if node is offline or gave an error.
2313 # override manually lu_result here as _ErrorIf only
2314 # overrides self.bad
2317 for script, hkr, output in res.payload:
2318 test = hkr == constants.HKR_FAIL
2319 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2320 "Script %s failed, output:", script)
2322 output = indent_re.sub(' ', output)
2323 feedback_fn("%s" % output)
2329 class LUVerifyDisks(NoHooksLU):
2330 """Verifies the cluster disks status.
2335 def ExpandNames(self):
2336 self.needed_locks = {
2337 locking.LEVEL_NODE: locking.ALL_SET,
2338 locking.LEVEL_INSTANCE: locking.ALL_SET,
2340 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2342 def Exec(self, feedback_fn):
2343 """Verify integrity of cluster disks.
2345 @rtype: tuple of three items
2346 @return: a tuple of (dict of node-to-node_error, list of instances
2347 which need activate-disks, dict of instance: (node, volume) for
2351 result = res_nodes, res_instances, res_missing = {}, [], {}
2353 vg_name = self.cfg.GetVGName()
2354 nodes = utils.NiceSort(self.cfg.GetNodeList())
2355 instances = [self.cfg.GetInstanceInfo(name)
2356 for name in self.cfg.GetInstanceList()]
2359 for inst in instances:
2361 if (not inst.admin_up or
2362 inst.disk_template not in constants.DTS_NET_MIRROR):
2364 inst.MapLVsByNode(inst_lvs)
2365 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2366 for node, vol_list in inst_lvs.iteritems():
2367 for vol in vol_list:
2368 nv_dict[(node, vol)] = inst
2373 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2377 node_res = node_lvs[node]
2378 if node_res.offline:
2380 msg = node_res.fail_msg
2382 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2383 res_nodes[node] = msg
2386 lvs = node_res.payload
2387 for lv_name, (_, _, lv_online) in lvs.items():
2388 inst = nv_dict.pop((node, lv_name), None)
2389 if (not lv_online and inst is not None
2390 and inst.name not in res_instances):
2391 res_instances.append(inst.name)
2393 # any leftover items in nv_dict are missing LVs, let's arrange the
2395 for key, inst in nv_dict.iteritems():
2396 if inst.name not in res_missing:
2397 res_missing[inst.name] = []
2398 res_missing[inst.name].append(key)
2403 class LURepairDiskSizes(NoHooksLU):
2404 """Verifies the cluster disks sizes.
2407 _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2410 def ExpandNames(self):
2411 if self.op.instances:
2412 self.wanted_names = []
2413 for name in self.op.instances:
2414 full_name = _ExpandInstanceName(self.cfg, name)
2415 self.wanted_names.append(full_name)
2416 self.needed_locks = {
2417 locking.LEVEL_NODE: [],
2418 locking.LEVEL_INSTANCE: self.wanted_names,
2420 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2422 self.wanted_names = None
2423 self.needed_locks = {
2424 locking.LEVEL_NODE: locking.ALL_SET,
2425 locking.LEVEL_INSTANCE: locking.ALL_SET,
2427 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2429 def DeclareLocks(self, level):
2430 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2431 self._LockInstancesNodes(primary_only=True)
2433 def CheckPrereq(self):
2434 """Check prerequisites.
2436 This only checks the optional instance list against the existing names.
2439 if self.wanted_names is None:
2440 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2442 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2443 in self.wanted_names]
2445 def _EnsureChildSizes(self, disk):
2446 """Ensure children of the disk have the needed disk size.
2448 This is valid mainly for DRBD8 and fixes an issue where the
2449 children have smaller disk size.
2451 @param disk: an L{ganeti.objects.Disk} object
2454 if disk.dev_type == constants.LD_DRBD8:
2455 assert disk.children, "Empty children for DRBD8?"
2456 fchild = disk.children[0]
2457 mismatch = fchild.size < disk.size
2459 self.LogInfo("Child disk has size %d, parent %d, fixing",
2460 fchild.size, disk.size)
2461 fchild.size = disk.size
2463 # and we recurse on this child only, not on the metadev
2464 return self._EnsureChildSizes(fchild) or mismatch
2468 def Exec(self, feedback_fn):
2469 """Verify the size of cluster disks.
2472 # TODO: check child disks too
2473 # TODO: check differences in size between primary/secondary nodes
2475 for instance in self.wanted_instances:
2476 pnode = instance.primary_node
2477 if pnode not in per_node_disks:
2478 per_node_disks[pnode] = []
2479 for idx, disk in enumerate(instance.disks):
2480 per_node_disks[pnode].append((instance, idx, disk))
2483 for node, dskl in per_node_disks.items():
2484 newl = [v[2].Copy() for v in dskl]
2486 self.cfg.SetDiskID(dsk, node)
2487 result = self.rpc.call_blockdev_getsizes(node, newl)
2489 self.LogWarning("Failure in blockdev_getsizes call to node"
2490 " %s, ignoring", node)
2492 if len(result.data) != len(dskl):
2493 self.LogWarning("Invalid result from node %s, ignoring node results",
2496 for ((instance, idx, disk), size) in zip(dskl, result.data):
2498 self.LogWarning("Disk %d of instance %s did not return size"
2499 " information, ignoring", idx, instance.name)
2501 if not isinstance(size, (int, long)):
2502 self.LogWarning("Disk %d of instance %s did not return valid"
2503 " size information, ignoring", idx, instance.name)
2506 if size != disk.size:
2507 self.LogInfo("Disk %d of instance %s has mismatched size,"
2508 " correcting: recorded %d, actual %d", idx,
2509 instance.name, disk.size, size)
2511 self.cfg.Update(instance, feedback_fn)
2512 changed.append((instance.name, idx, size))
2513 if self._EnsureChildSizes(disk):
2514 self.cfg.Update(instance, feedback_fn)
2515 changed.append((instance.name, idx, disk.size))
2519 class LURenameCluster(LogicalUnit):
2520 """Rename the cluster.
2523 HPATH = "cluster-rename"
2524 HTYPE = constants.HTYPE_CLUSTER
2525 _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2527 def BuildHooksEnv(self):
2532 "OP_TARGET": self.cfg.GetClusterName(),
2533 "NEW_NAME": self.op.name,
2535 mn = self.cfg.GetMasterNode()
2536 all_nodes = self.cfg.GetNodeList()
2537 return env, [mn], all_nodes
2539 def CheckPrereq(self):
2540 """Verify that the passed name is a valid one.
2543 hostname = netutils.GetHostInfo(self.op.name)
2545 new_name = hostname.name
2546 self.ip = new_ip = hostname.ip
2547 old_name = self.cfg.GetClusterName()
2548 old_ip = self.cfg.GetMasterIP()
2549 if new_name == old_name and new_ip == old_ip:
2550 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2551 " cluster has changed",
2553 if new_ip != old_ip:
2554 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2555 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2556 " reachable on the network. Aborting." %
2557 new_ip, errors.ECODE_NOTUNIQUE)
2559 self.op.name = new_name
2561 def Exec(self, feedback_fn):
2562 """Rename the cluster.
2565 clustername = self.op.name
2568 # shutdown the master IP
2569 master = self.cfg.GetMasterNode()
2570 result = self.rpc.call_node_stop_master(master, False)
2571 result.Raise("Could not disable the master role")
2574 cluster = self.cfg.GetClusterInfo()
2575 cluster.cluster_name = clustername
2576 cluster.master_ip = ip
2577 self.cfg.Update(cluster, feedback_fn)
2579 # update the known hosts file
2580 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2581 node_list = self.cfg.GetNodeList()
2583 node_list.remove(master)
2586 result = self.rpc.call_upload_file(node_list,
2587 constants.SSH_KNOWN_HOSTS_FILE)
2588 for to_node, to_result in result.iteritems():
2589 msg = to_result.fail_msg
2591 msg = ("Copy of file %s to node %s failed: %s" %
2592 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2593 self.proc.LogWarning(msg)
2596 result = self.rpc.call_node_start_master(master, False, False)
2597 msg = result.fail_msg
2599 self.LogWarning("Could not re-enable the master role on"
2600 " the master, please restart manually: %s", msg)
2605 class LUSetClusterParams(LogicalUnit):
2606 """Change the parameters of the cluster.
2609 HPATH = "cluster-modify"
2610 HTYPE = constants.HTYPE_CLUSTER
2612 ("vg_name", None, _TMaybeString),
2613 ("enabled_hypervisors", None,
2614 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2615 ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2616 ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2617 ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2618 ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2619 ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2620 ("uid_pool", None, _NoType),
2621 ("add_uids", None, _NoType),
2622 ("remove_uids", None, _NoType),
2623 ("maintain_node_health", None, _TMaybeBool),
2624 ("nicparams", None, _TOr(_TDict, _TNone)),
2625 ("drbd_helper", None, _TOr(_TString, _TNone)),
2626 ("default_iallocator", None, _TMaybeString),
2627 ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2628 ("hidden_os", None, _TOr(_TListOf(\
2631 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2633 ("blacklisted_os", None, _TOr(_TListOf(\
2636 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2641 def CheckArguments(self):
2645 if self.op.uid_pool:
2646 uidpool.CheckUidPool(self.op.uid_pool)
2648 if self.op.add_uids:
2649 uidpool.CheckUidPool(self.op.add_uids)
2651 if self.op.remove_uids:
2652 uidpool.CheckUidPool(self.op.remove_uids)
2654 def ExpandNames(self):
2655 # FIXME: in the future maybe other cluster params won't require checking on
2656 # all nodes to be modified.
2657 self.needed_locks = {
2658 locking.LEVEL_NODE: locking.ALL_SET,
2660 self.share_locks[locking.LEVEL_NODE] = 1
2662 def BuildHooksEnv(self):
2667 "OP_TARGET": self.cfg.GetClusterName(),
2668 "NEW_VG_NAME": self.op.vg_name,
2670 mn = self.cfg.GetMasterNode()
2671 return env, [mn], [mn]
2673 def CheckPrereq(self):
2674 """Check prerequisites.
2676 This checks whether the given params don't conflict and
2677 if the given volume group is valid.
2680 if self.op.vg_name is not None and not self.op.vg_name:
2681 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2682 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2683 " instances exist", errors.ECODE_INVAL)
2685 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2686 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2687 raise errors.OpPrereqError("Cannot disable drbd helper while"
2688 " drbd-based instances exist",
2691 node_list = self.acquired_locks[locking.LEVEL_NODE]
2693 # if vg_name not None, checks given volume group on all nodes
2695 vglist = self.rpc.call_vg_list(node_list)
2696 for node in node_list:
2697 msg = vglist[node].fail_msg
2699 # ignoring down node
2700 self.LogWarning("Error while gathering data on node %s"
2701 " (ignoring node): %s", node, msg)
2703 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2705 constants.MIN_VG_SIZE)
2707 raise errors.OpPrereqError("Error on node '%s': %s" %
2708 (node, vgstatus), errors.ECODE_ENVIRON)
2710 if self.op.drbd_helper:
2711 # checks given drbd helper on all nodes
2712 helpers = self.rpc.call_drbd_helper(node_list)
2713 for node in node_list:
2714 ninfo = self.cfg.GetNodeInfo(node)
2716 self.LogInfo("Not checking drbd helper on offline node %s", node)
2718 msg = helpers[node].fail_msg
2720 raise errors.OpPrereqError("Error checking drbd helper on node"
2721 " '%s': %s" % (node, msg),
2722 errors.ECODE_ENVIRON)
2723 node_helper = helpers[node].payload
2724 if node_helper != self.op.drbd_helper:
2725 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2726 (node, node_helper), errors.ECODE_ENVIRON)
2728 self.cluster = cluster = self.cfg.GetClusterInfo()
2729 # validate params changes
2730 if self.op.beparams:
2731 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2732 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2734 if self.op.nicparams:
2735 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2736 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2737 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2740 # check all instances for consistency
2741 for instance in self.cfg.GetAllInstancesInfo().values():
2742 for nic_idx, nic in enumerate(instance.nics):
2743 params_copy = copy.deepcopy(nic.nicparams)
2744 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2746 # check parameter syntax
2748 objects.NIC.CheckParameterSyntax(params_filled)
2749 except errors.ConfigurationError, err:
2750 nic_errors.append("Instance %s, nic/%d: %s" %
2751 (instance.name, nic_idx, err))
2753 # if we're moving instances to routed, check that they have an ip
2754 target_mode = params_filled[constants.NIC_MODE]
2755 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2756 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2757 (instance.name, nic_idx))
2759 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2760 "\n".join(nic_errors))
2762 # hypervisor list/parameters
2763 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2764 if self.op.hvparams:
2765 for hv_name, hv_dict in self.op.hvparams.items():
2766 if hv_name not in self.new_hvparams:
2767 self.new_hvparams[hv_name] = hv_dict
2769 self.new_hvparams[hv_name].update(hv_dict)
2771 # os hypervisor parameters
2772 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2774 for os_name, hvs in self.op.os_hvp.items():
2775 if os_name not in self.new_os_hvp:
2776 self.new_os_hvp[os_name] = hvs
2778 for hv_name, hv_dict in hvs.items():
2779 if hv_name not in self.new_os_hvp[os_name]:
2780 self.new_os_hvp[os_name][hv_name] = hv_dict
2782 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2785 self.new_osp = objects.FillDict(cluster.osparams, {})
2786 if self.op.osparams:
2787 for os_name, osp in self.op.osparams.items():
2788 if os_name not in self.new_osp:
2789 self.new_osp[os_name] = {}
2791 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2794 if not self.new_osp[os_name]:
2795 # we removed all parameters
2796 del self.new_osp[os_name]
2798 # check the parameter validity (remote check)
2799 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2800 os_name, self.new_osp[os_name])
2802 # changes to the hypervisor list
2803 if self.op.enabled_hypervisors is not None:
2804 self.hv_list = self.op.enabled_hypervisors
2805 for hv in self.hv_list:
2806 # if the hypervisor doesn't already exist in the cluster
2807 # hvparams, we initialize it to empty, and then (in both
2808 # cases) we make sure to fill the defaults, as we might not
2809 # have a complete defaults list if the hypervisor wasn't
2811 if hv not in new_hvp:
2813 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2814 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2816 self.hv_list = cluster.enabled_hypervisors
2818 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2819 # either the enabled list has changed, or the parameters have, validate
2820 for hv_name, hv_params in self.new_hvparams.items():
2821 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2822 (self.op.enabled_hypervisors and
2823 hv_name in self.op.enabled_hypervisors)):
2824 # either this is a new hypervisor, or its parameters have changed
2825 hv_class = hypervisor.GetHypervisor(hv_name)
2826 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2827 hv_class.CheckParameterSyntax(hv_params)
2828 _CheckHVParams(self, node_list, hv_name, hv_params)
2831 # no need to check any newly-enabled hypervisors, since the
2832 # defaults have already been checked in the above code-block
2833 for os_name, os_hvp in self.new_os_hvp.items():
2834 for hv_name, hv_params in os_hvp.items():
2835 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2836 # we need to fill in the new os_hvp on top of the actual hv_p
2837 cluster_defaults = self.new_hvparams.get(hv_name, {})
2838 new_osp = objects.FillDict(cluster_defaults, hv_params)
2839 hv_class = hypervisor.GetHypervisor(hv_name)
2840 hv_class.CheckParameterSyntax(new_osp)
2841 _CheckHVParams(self, node_list, hv_name, new_osp)
2843 if self.op.default_iallocator:
2844 alloc_script = utils.FindFile(self.op.default_iallocator,
2845 constants.IALLOCATOR_SEARCH_PATH,
2847 if alloc_script is None:
2848 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2849 " specified" % self.op.default_iallocator,
2852 def Exec(self, feedback_fn):
2853 """Change the parameters of the cluster.
2856 if self.op.vg_name is not None:
2857 new_volume = self.op.vg_name
2860 if new_volume != self.cfg.GetVGName():
2861 self.cfg.SetVGName(new_volume)
2863 feedback_fn("Cluster LVM configuration already in desired"
2864 " state, not changing")
2865 if self.op.drbd_helper is not None:
2866 new_helper = self.op.drbd_helper
2869 if new_helper != self.cfg.GetDRBDHelper():
2870 self.cfg.SetDRBDHelper(new_helper)
2872 feedback_fn("Cluster DRBD helper already in desired state,"
2874 if self.op.hvparams:
2875 self.cluster.hvparams = self.new_hvparams
2877 self.cluster.os_hvp = self.new_os_hvp
2878 if self.op.enabled_hypervisors is not None:
2879 self.cluster.hvparams = self.new_hvparams
2880 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2881 if self.op.beparams:
2882 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2883 if self.op.nicparams:
2884 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2885 if self.op.osparams:
2886 self.cluster.osparams = self.new_osp
2888 if self.op.candidate_pool_size is not None:
2889 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2890 # we need to update the pool size here, otherwise the save will fail
2891 _AdjustCandidatePool(self, [])
2893 if self.op.maintain_node_health is not None:
2894 self.cluster.maintain_node_health = self.op.maintain_node_health
2896 if self.op.add_uids is not None:
2897 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2899 if self.op.remove_uids is not None:
2900 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2902 if self.op.uid_pool is not None:
2903 self.cluster.uid_pool = self.op.uid_pool
2905 if self.op.default_iallocator is not None:
2906 self.cluster.default_iallocator = self.op.default_iallocator
2908 if self.op.reserved_lvs is not None:
2909 self.cluster.reserved_lvs = self.op.reserved_lvs
2911 def helper_os(aname, mods, desc):
2913 lst = getattr(self.cluster, aname)
2914 for key, val in mods:
2915 if key == constants.DDM_ADD:
2917 feedback_fn("OS %s already in %s, ignoring", val, desc)
2920 elif key == constants.DDM_REMOVE:
2924 feedback_fn("OS %s not found in %s, ignoring", val, desc)
2926 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2928 if self.op.hidden_os:
2929 helper_os("hidden_os", self.op.hidden_os, "hidden")
2931 if self.op.blacklisted_os:
2932 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2934 self.cfg.Update(self.cluster, feedback_fn)
2937 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2938 """Distribute additional files which are part of the cluster configuration.
2940 ConfigWriter takes care of distributing the config and ssconf files, but
2941 there are more files which should be distributed to all nodes. This function
2942 makes sure those are copied.
2944 @param lu: calling logical unit
2945 @param additional_nodes: list of nodes not in the config to distribute to
2948 # 1. Gather target nodes
2949 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2950 dist_nodes = lu.cfg.GetOnlineNodeList()
2951 if additional_nodes is not None:
2952 dist_nodes.extend(additional_nodes)
2953 if myself.name in dist_nodes:
2954 dist_nodes.remove(myself.name)
2956 # 2. Gather files to distribute
2957 dist_files = set([constants.ETC_HOSTS,
2958 constants.SSH_KNOWN_HOSTS_FILE,
2959 constants.RAPI_CERT_FILE,
2960 constants.RAPI_USERS_FILE,
2961 constants.CONFD_HMAC_KEY,
2962 constants.CLUSTER_DOMAIN_SECRET_FILE,
2965 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2966 for hv_name in enabled_hypervisors:
2967 hv_class = hypervisor.GetHypervisor(hv_name)
2968 dist_files.update(hv_class.GetAncillaryFiles())
2970 # 3. Perform the files upload
2971 for fname in dist_files:
2972 if os.path.exists(fname):
2973 result = lu.rpc.call_upload_file(dist_nodes, fname)
2974 for to_node, to_result in result.items():
2975 msg = to_result.fail_msg
2977 msg = ("Copy of file %s to node %s failed: %s" %
2978 (fname, to_node, msg))
2979 lu.proc.LogWarning(msg)
2982 class LURedistributeConfig(NoHooksLU):
2983 """Force the redistribution of cluster configuration.
2985 This is a very simple LU.
2990 def ExpandNames(self):
2991 self.needed_locks = {
2992 locking.LEVEL_NODE: locking.ALL_SET,
2994 self.share_locks[locking.LEVEL_NODE] = 1
2996 def Exec(self, feedback_fn):
2997 """Redistribute the configuration.
3000 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3001 _RedistributeAncillaryFiles(self)
3004 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3005 """Sleep and poll for an instance's disk to sync.
3008 if not instance.disks or disks is not None and not disks:
3011 disks = _ExpandCheckDisks(instance, disks)
3014 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3016 node = instance.primary_node
3019 lu.cfg.SetDiskID(dev, node)
3021 # TODO: Convert to utils.Retry
3024 degr_retries = 10 # in seconds, as we sleep 1 second each time
3028 cumul_degraded = False
3029 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3030 msg = rstats.fail_msg
3032 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3035 raise errors.RemoteError("Can't contact node %s for mirror data,"
3036 " aborting." % node)
3039 rstats = rstats.payload
3041 for i, mstat in enumerate(rstats):
3043 lu.LogWarning("Can't compute data for node %s/%s",
3044 node, disks[i].iv_name)
3047 cumul_degraded = (cumul_degraded or
3048 (mstat.is_degraded and mstat.sync_percent is None))
3049 if mstat.sync_percent is not None:
3051 if mstat.estimated_time is not None:
3052 rem_time = ("%s remaining (estimated)" %
3053 utils.FormatSeconds(mstat.estimated_time))
3054 max_time = mstat.estimated_time
3056 rem_time = "no time estimate"
3057 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3058 (disks[i].iv_name, mstat.sync_percent, rem_time))
3060 # if we're done but degraded, let's do a few small retries, to
3061 # make sure we see a stable and not transient situation; therefore
3062 # we force restart of the loop
3063 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3064 logging.info("Degraded disks found, %d retries left", degr_retries)
3072 time.sleep(min(60, max_time))
3075 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3076 return not cumul_degraded
3079 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3080 """Check that mirrors are not degraded.
3082 The ldisk parameter, if True, will change the test from the
3083 is_degraded attribute (which represents overall non-ok status for
3084 the device(s)) to the ldisk (representing the local storage status).
3087 lu.cfg.SetDiskID(dev, node)
3091 if on_primary or dev.AssembleOnSecondary():
3092 rstats = lu.rpc.call_blockdev_find(node, dev)
3093 msg = rstats.fail_msg
3095 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3097 elif not rstats.payload:
3098 lu.LogWarning("Can't find disk on node %s", node)
3102 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3104 result = result and not rstats.payload.is_degraded
3107 for child in dev.children:
3108 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3113 class LUDiagnoseOS(NoHooksLU):
3114 """Logical unit for OS diagnose/query.
3119 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3123 _BLK = "blacklisted"
3125 _FIELDS_STATIC = utils.FieldSet()
3126 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3127 "parameters", "api_versions", _HID, _BLK)
3129 def CheckArguments(self):
3131 raise errors.OpPrereqError("Selective OS query not supported",
3134 _CheckOutputFields(static=self._FIELDS_STATIC,
3135 dynamic=self._FIELDS_DYNAMIC,
3136 selected=self.op.output_fields)
3138 def ExpandNames(self):
3139 # Lock all nodes, in shared mode
3140 # Temporary removal of locks, should be reverted later
3141 # TODO: reintroduce locks when they are lighter-weight
3142 self.needed_locks = {}
3143 #self.share_locks[locking.LEVEL_NODE] = 1
3144 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3147 def _DiagnoseByOS(rlist):
3148 """Remaps a per-node return list into an a per-os per-node dictionary
3150 @param rlist: a map with node names as keys and OS objects as values
3153 @return: a dictionary with osnames as keys and as value another
3154 map, with nodes as keys and tuples of (path, status, diagnose,
3155 variants, parameters, api_versions) as values, eg::
3157 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3158 (/srv/..., False, "invalid api")],
3159 "node2": [(/srv/..., True, "", [], [])]}
3164 # we build here the list of nodes that didn't fail the RPC (at RPC
3165 # level), so that nodes with a non-responding node daemon don't
3166 # make all OSes invalid
3167 good_nodes = [node_name for node_name in rlist
3168 if not rlist[node_name].fail_msg]
3169 for node_name, nr in rlist.items():
3170 if nr.fail_msg or not nr.payload:
3172 for (name, path, status, diagnose, variants,
3173 params, api_versions) in nr.payload:
3174 if name not in all_os:
3175 # build a list of nodes for this os containing empty lists
3176 # for each node in node_list
3178 for nname in good_nodes:
3179 all_os[name][nname] = []
3180 # convert params from [name, help] to (name, help)
3181 params = [tuple(v) for v in params]
3182 all_os[name][node_name].append((path, status, diagnose,
3183 variants, params, api_versions))
3186 def Exec(self, feedback_fn):
3187 """Compute the list of OSes.
3190 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3191 node_data = self.rpc.call_os_diagnose(valid_nodes)
3192 pol = self._DiagnoseByOS(node_data)
3194 cluster = self.cfg.GetClusterInfo()
3196 for os_name in utils.NiceSort(pol.keys()):
3197 os_data = pol[os_name]
3200 (variants, params, api_versions) = null_state = (set(), set(), set())
3201 for idx, osl in enumerate(os_data.values()):
3202 valid = bool(valid and osl and osl[0][1])
3204 (variants, params, api_versions) = null_state
3206 node_variants, node_params, node_api = osl[0][3:6]
3207 if idx == 0: # first entry
3208 variants = set(node_variants)
3209 params = set(node_params)
3210 api_versions = set(node_api)
3211 else: # keep consistency
3212 variants.intersection_update(node_variants)
3213 params.intersection_update(node_params)
3214 api_versions.intersection_update(node_api)
3216 is_hid = os_name in cluster.hidden_os
3217 is_blk = os_name in cluster.blacklisted_os
3218 if ((self._HID not in self.op.output_fields and is_hid) or
3219 (self._BLK not in self.op.output_fields and is_blk) or
3220 (self._VLD not in self.op.output_fields and not valid)):
3223 for field in self.op.output_fields:
3226 elif field == self._VLD:
3228 elif field == "node_status":
3229 # this is just a copy of the dict
3231 for node_name, nos_list in os_data.items():
3232 val[node_name] = nos_list
3233 elif field == "variants":
3234 val = utils.NiceSort(list(variants))
3235 elif field == "parameters":
3237 elif field == "api_versions":
3238 val = list(api_versions)
3239 elif field == self._HID:
3241 elif field == self._BLK:
3244 raise errors.ParameterError(field)
3251 class LURemoveNode(LogicalUnit):
3252 """Logical unit for removing a node.
3255 HPATH = "node-remove"
3256 HTYPE = constants.HTYPE_NODE
3261 def BuildHooksEnv(self):
3264 This doesn't run on the target node in the pre phase as a failed
3265 node would then be impossible to remove.
3269 "OP_TARGET": self.op.node_name,
3270 "NODE_NAME": self.op.node_name,
3272 all_nodes = self.cfg.GetNodeList()
3274 all_nodes.remove(self.op.node_name)
3276 logging.warning("Node %s which is about to be removed not found"
3277 " in the all nodes list", self.op.node_name)
3278 return env, all_nodes, all_nodes
3280 def CheckPrereq(self):
3281 """Check prerequisites.
3284 - the node exists in the configuration
3285 - it does not have primary or secondary instances
3286 - it's not the master
3288 Any errors are signaled by raising errors.OpPrereqError.
3291 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3292 node = self.cfg.GetNodeInfo(self.op.node_name)
3293 assert node is not None
3295 instance_list = self.cfg.GetInstanceList()
3297 masternode = self.cfg.GetMasterNode()
3298 if node.name == masternode:
3299 raise errors.OpPrereqError("Node is the master node,"
3300 " you need to failover first.",
3303 for instance_name in instance_list:
3304 instance = self.cfg.GetInstanceInfo(instance_name)
3305 if node.name in instance.all_nodes:
3306 raise errors.OpPrereqError("Instance %s is still running on the node,"
3307 " please remove first." % instance_name,
3309 self.op.node_name = node.name
3312 def Exec(self, feedback_fn):
3313 """Removes the node from the cluster.
3317 logging.info("Stopping the node daemon and removing configs from node %s",
3320 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3322 # Promote nodes to master candidate as needed
3323 _AdjustCandidatePool(self, exceptions=[node.name])
3324 self.context.RemoveNode(node.name)
3326 # Run post hooks on the node before it's removed
3327 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3329 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3331 # pylint: disable-msg=W0702
3332 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3334 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3335 msg = result.fail_msg
3337 self.LogWarning("Errors encountered on the remote node while leaving"
3338 " the cluster: %s", msg)
3340 # Remove node from our /etc/hosts
3341 if self.cfg.GetClusterInfo().modify_etc_hosts:
3342 # FIXME: this should be done via an rpc call to node daemon
3343 utils.RemoveHostFromEtcHosts(node.name)
3344 _RedistributeAncillaryFiles(self)
3347 class LUQueryNodes(NoHooksLU):
3348 """Logical unit for querying nodes.
3351 # pylint: disable-msg=W0142
3354 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3355 ("use_locking", False, _TBool),
3359 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3360 "master_candidate", "offline", "drained"]
3362 _FIELDS_DYNAMIC = utils.FieldSet(
3364 "mtotal", "mnode", "mfree",
3366 "ctotal", "cnodes", "csockets",
3369 _FIELDS_STATIC = utils.FieldSet(*[
3370 "pinst_cnt", "sinst_cnt",
3371 "pinst_list", "sinst_list",
3372 "pip", "sip", "tags",
3374 "role"] + _SIMPLE_FIELDS
3377 def CheckArguments(self):
3378 _CheckOutputFields(static=self._FIELDS_STATIC,
3379 dynamic=self._FIELDS_DYNAMIC,
3380 selected=self.op.output_fields)
3382 def ExpandNames(self):
3383 self.needed_locks = {}
3384 self.share_locks[locking.LEVEL_NODE] = 1
3387 self.wanted = _GetWantedNodes(self, self.op.names)
3389 self.wanted = locking.ALL_SET
3391 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3392 self.do_locking = self.do_node_query and self.op.use_locking
3394 # if we don't request only static fields, we need to lock the nodes
3395 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3397 def Exec(self, feedback_fn):
3398 """Computes the list of nodes and their attributes.
3401 all_info = self.cfg.GetAllNodesInfo()
3403 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3404 elif self.wanted != locking.ALL_SET:
3405 nodenames = self.wanted
3406 missing = set(nodenames).difference(all_info.keys())
3408 raise errors.OpExecError(
3409 "Some nodes were removed before retrieving their data: %s" % missing)
3411 nodenames = all_info.keys()
3413 nodenames = utils.NiceSort(nodenames)
3414 nodelist = [all_info[name] for name in nodenames]
3416 # begin data gathering
3418 if self.do_node_query:
3420 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3421 self.cfg.GetHypervisorType())
3422 for name in nodenames:
3423 nodeinfo = node_data[name]
3424 if not nodeinfo.fail_msg and nodeinfo.payload:
3425 nodeinfo = nodeinfo.payload
3426 fn = utils.TryConvert
3428 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3429 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3430 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3431 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3432 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3433 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3434 "bootid": nodeinfo.get('bootid', None),
3435 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3436 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3439 live_data[name] = {}
3441 live_data = dict.fromkeys(nodenames, {})
3443 node_to_primary = dict([(name, set()) for name in nodenames])
3444 node_to_secondary = dict([(name, set()) for name in nodenames])
3446 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3447 "sinst_cnt", "sinst_list"))
3448 if inst_fields & frozenset(self.op.output_fields):
3449 inst_data = self.cfg.GetAllInstancesInfo()
3451 for inst in inst_data.values():
3452 if inst.primary_node in node_to_primary:
3453 node_to_primary[inst.primary_node].add(inst.name)
3454 for secnode in inst.secondary_nodes:
3455 if secnode in node_to_secondary:
3456 node_to_secondary[secnode].add(inst.name)
3458 master_node = self.cfg.GetMasterNode()
3460 # end data gathering
3463 for node in nodelist:
3465 for field in self.op.output_fields:
3466 if field in self._SIMPLE_FIELDS:
3467 val = getattr(node, field)
3468 elif field == "pinst_list":
3469 val = list(node_to_primary[node.name])
3470 elif field == "sinst_list":
3471 val = list(node_to_secondary[node.name])
3472 elif field == "pinst_cnt":
3473 val = len(node_to_primary[node.name])
3474 elif field == "sinst_cnt":
3475 val = len(node_to_secondary[node.name])
3476 elif field == "pip":
3477 val = node.primary_ip
3478 elif field == "sip":
3479 val = node.secondary_ip
3480 elif field == "tags":
3481 val = list(node.GetTags())
3482 elif field == "master":
3483 val = node.name == master_node
3484 elif self._FIELDS_DYNAMIC.Matches(field):
3485 val = live_data[node.name].get(field, None)
3486 elif field == "role":
3487 if node.name == master_node:
3489 elif node.master_candidate:
3498 raise errors.ParameterError(field)
3499 node_output.append(val)
3500 output.append(node_output)
3505 class LUQueryNodeVolumes(NoHooksLU):
3506 """Logical unit for getting volumes on node(s).
3510 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3511 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3514 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3515 _FIELDS_STATIC = utils.FieldSet("node")
3517 def CheckArguments(self):
3518 _CheckOutputFields(static=self._FIELDS_STATIC,
3519 dynamic=self._FIELDS_DYNAMIC,
3520 selected=self.op.output_fields)
3522 def ExpandNames(self):
3523 self.needed_locks = {}
3524 self.share_locks[locking.LEVEL_NODE] = 1
3525 if not self.op.nodes:
3526 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3528 self.needed_locks[locking.LEVEL_NODE] = \
3529 _GetWantedNodes(self, self.op.nodes)
3531 def Exec(self, feedback_fn):
3532 """Computes the list of nodes and their attributes.
3535 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3536 volumes = self.rpc.call_node_volumes(nodenames)
3538 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3539 in self.cfg.GetInstanceList()]
3541 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3544 for node in nodenames:
3545 nresult = volumes[node]
3548 msg = nresult.fail_msg
3550 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3553 node_vols = nresult.payload[:]
3554 node_vols.sort(key=lambda vol: vol['dev'])
3556 for vol in node_vols:
3558 for field in self.op.output_fields:
3561 elif field == "phys":
3565 elif field == "name":
3567 elif field == "size":
3568 val = int(float(vol['size']))
3569 elif field == "instance":
3571 if node not in lv_by_node[inst]:
3573 if vol['name'] in lv_by_node[inst][node]:
3579 raise errors.ParameterError(field)
3580 node_output.append(str(val))
3582 output.append(node_output)
3587 class LUQueryNodeStorage(NoHooksLU):
3588 """Logical unit for getting information on storage units on node(s).
3591 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3593 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3594 ("storage_type", _NoDefault, _CheckStorageType),
3595 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3596 ("name", None, _TMaybeString),
3600 def CheckArguments(self):
3601 _CheckOutputFields(static=self._FIELDS_STATIC,
3602 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3603 selected=self.op.output_fields)
3605 def ExpandNames(self):
3606 self.needed_locks = {}
3607 self.share_locks[locking.LEVEL_NODE] = 1
3610 self.needed_locks[locking.LEVEL_NODE] = \
3611 _GetWantedNodes(self, self.op.nodes)
3613 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3615 def Exec(self, feedback_fn):
3616 """Computes the list of nodes and their attributes.
3619 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3621 # Always get name to sort by
3622 if constants.SF_NAME in self.op.output_fields:
3623 fields = self.op.output_fields[:]
3625 fields = [constants.SF_NAME] + self.op.output_fields
3627 # Never ask for node or type as it's only known to the LU
3628 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3629 while extra in fields:
3630 fields.remove(extra)
3632 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3633 name_idx = field_idx[constants.SF_NAME]
3635 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3636 data = self.rpc.call_storage_list(self.nodes,
3637 self.op.storage_type, st_args,
3638 self.op.name, fields)
3642 for node in utils.NiceSort(self.nodes):
3643 nresult = data[node]
3647 msg = nresult.fail_msg
3649 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3652 rows = dict([(row[name_idx], row) for row in nresult.payload])
3654 for name in utils.NiceSort(rows.keys()):
3659 for field in self.op.output_fields:
3660 if field == constants.SF_NODE:
3662 elif field == constants.SF_TYPE:
3663 val = self.op.storage_type
3664 elif field in field_idx:
3665 val = row[field_idx[field]]
3667 raise errors.ParameterError(field)
3676 class LUModifyNodeStorage(NoHooksLU):
3677 """Logical unit for modifying a storage volume on a node.
3682 ("storage_type", _NoDefault, _CheckStorageType),
3683 ("name", _NoDefault, _TNonEmptyString),
3684 ("changes", _NoDefault, _TDict),
3688 def CheckArguments(self):
3689 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3691 storage_type = self.op.storage_type
3694 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3696 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3697 " modified" % storage_type,
3700 diff = set(self.op.changes.keys()) - modifiable
3702 raise errors.OpPrereqError("The following fields can not be modified for"
3703 " storage units of type '%s': %r" %
3704 (storage_type, list(diff)),
3707 def ExpandNames(self):
3708 self.needed_locks = {
3709 locking.LEVEL_NODE: self.op.node_name,
3712 def Exec(self, feedback_fn):
3713 """Computes the list of nodes and their attributes.
3716 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3717 result = self.rpc.call_storage_modify(self.op.node_name,
3718 self.op.storage_type, st_args,
3719 self.op.name, self.op.changes)
3720 result.Raise("Failed to modify storage unit '%s' on %s" %
3721 (self.op.name, self.op.node_name))
3724 class LUAddNode(LogicalUnit):
3725 """Logical unit for adding node to the cluster.
3729 HTYPE = constants.HTYPE_NODE
3732 ("primary_ip", None, _NoType),
3733 ("secondary_ip", None, _TMaybeString),
3734 ("readd", False, _TBool),
3737 def CheckArguments(self):
3738 # validate/normalize the node name
3739 self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3741 def BuildHooksEnv(self):
3744 This will run on all nodes before, and on all nodes + the new node after.
3748 "OP_TARGET": self.op.node_name,
3749 "NODE_NAME": self.op.node_name,
3750 "NODE_PIP": self.op.primary_ip,
3751 "NODE_SIP": self.op.secondary_ip,
3753 nodes_0 = self.cfg.GetNodeList()
3754 nodes_1 = nodes_0 + [self.op.node_name, ]
3755 return env, nodes_0, nodes_1
3757 def CheckPrereq(self):
3758 """Check prerequisites.
3761 - the new node is not already in the config
3763 - its parameters (single/dual homed) matches the cluster
3765 Any errors are signaled by raising errors.OpPrereqError.
3768 node_name = self.op.node_name
3771 dns_data = netutils.GetHostInfo(node_name)
3773 node = dns_data.name
3774 primary_ip = self.op.primary_ip = dns_data.ip
3775 if self.op.secondary_ip is None:
3776 self.op.secondary_ip = primary_ip
3777 if not netutils.IsValidIP4(self.op.secondary_ip):
3778 raise errors.OpPrereqError("Invalid secondary IP given",
3780 secondary_ip = self.op.secondary_ip
3782 node_list = cfg.GetNodeList()
3783 if not self.op.readd and node in node_list:
3784 raise errors.OpPrereqError("Node %s is already in the configuration" %
3785 node, errors.ECODE_EXISTS)
3786 elif self.op.readd and node not in node_list:
3787 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3790 self.changed_primary_ip = False
3792 for existing_node_name in node_list:
3793 existing_node = cfg.GetNodeInfo(existing_node_name)
3795 if self.op.readd and node == existing_node_name:
3796 if existing_node.secondary_ip != secondary_ip:
3797 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3798 " address configuration as before",
3800 if existing_node.primary_ip != primary_ip:
3801 self.changed_primary_ip = True
3805 if (existing_node.primary_ip == primary_ip or
3806 existing_node.secondary_ip == primary_ip or
3807 existing_node.primary_ip == secondary_ip or
3808 existing_node.secondary_ip == secondary_ip):
3809 raise errors.OpPrereqError("New node ip address(es) conflict with"
3810 " existing node %s" % existing_node.name,
3811 errors.ECODE_NOTUNIQUE)
3813 # check that the type of the node (single versus dual homed) is the
3814 # same as for the master
3815 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3816 master_singlehomed = myself.secondary_ip == myself.primary_ip
3817 newbie_singlehomed = secondary_ip == primary_ip
3818 if master_singlehomed != newbie_singlehomed:
3819 if master_singlehomed:
3820 raise errors.OpPrereqError("The master has no private ip but the"
3821 " new node has one",
3824 raise errors.OpPrereqError("The master has a private ip but the"
3825 " new node doesn't have one",
3828 # checks reachability
3829 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3830 raise errors.OpPrereqError("Node not reachable by ping",
3831 errors.ECODE_ENVIRON)
3833 if not newbie_singlehomed:
3834 # check reachability from my secondary ip to newbie's secondary ip
3835 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3836 source=myself.secondary_ip):
3837 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3838 " based ping to noded port",
3839 errors.ECODE_ENVIRON)
3846 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3849 self.new_node = self.cfg.GetNodeInfo(node)
3850 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3852 self.new_node = objects.Node(name=node,
3853 primary_ip=primary_ip,
3854 secondary_ip=secondary_ip,
3855 master_candidate=self.master_candidate,
3856 offline=False, drained=False)
3858 def Exec(self, feedback_fn):
3859 """Adds the new node to the cluster.
3862 new_node = self.new_node
3863 node = new_node.name
3865 # for re-adds, reset the offline/drained/master-candidate flags;
3866 # we need to reset here, otherwise offline would prevent RPC calls
3867 # later in the procedure; this also means that if the re-add
3868 # fails, we are left with a non-offlined, broken node
3870 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3871 self.LogInfo("Readding a node, the offline/drained flags were reset")
3872 # if we demote the node, we do cleanup later in the procedure
3873 new_node.master_candidate = self.master_candidate
3874 if self.changed_primary_ip:
3875 new_node.primary_ip = self.op.primary_ip
3877 # notify the user about any possible mc promotion
3878 if new_node.master_candidate:
3879 self.LogInfo("Node will be a master candidate")
3881 # check connectivity
3882 result = self.rpc.call_version([node])[node]
3883 result.Raise("Can't get version information from node %s" % node)
3884 if constants.PROTOCOL_VERSION == result.payload:
3885 logging.info("Communication to node %s fine, sw version %s match",
3886 node, result.payload)
3888 raise errors.OpExecError("Version mismatch master version %s,"
3889 " node version %s" %
3890 (constants.PROTOCOL_VERSION, result.payload))
3893 if self.cfg.GetClusterInfo().modify_ssh_setup:
3894 logging.info("Copy ssh key to node %s", node)
3895 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3897 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3898 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3902 keyarray.append(utils.ReadFile(i))
3904 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3905 keyarray[2], keyarray[3], keyarray[4],
3907 result.Raise("Cannot transfer ssh keys to the new node")
3909 # Add node to our /etc/hosts, and add key to known_hosts
3910 if self.cfg.GetClusterInfo().modify_etc_hosts:
3911 # FIXME: this should be done via an rpc call to node daemon
3912 utils.AddHostToEtcHosts(new_node.name)
3914 if new_node.secondary_ip != new_node.primary_ip:
3915 result = self.rpc.call_node_has_ip_address(new_node.name,
3916 new_node.secondary_ip)
3917 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3918 prereq=True, ecode=errors.ECODE_ENVIRON)
3919 if not result.payload:
3920 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3921 " you gave (%s). Please fix and re-run this"
3922 " command." % new_node.secondary_ip)
3924 node_verify_list = [self.cfg.GetMasterNode()]
3925 node_verify_param = {
3926 constants.NV_NODELIST: [node],
3927 # TODO: do a node-net-test as well?
3930 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3931 self.cfg.GetClusterName())
3932 for verifier in node_verify_list:
3933 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3934 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3936 for failed in nl_payload:
3937 feedback_fn("ssh/hostname verification failed"
3938 " (checking from %s): %s" %
3939 (verifier, nl_payload[failed]))
3940 raise errors.OpExecError("ssh/hostname verification failed.")
3943 _RedistributeAncillaryFiles(self)
3944 self.context.ReaddNode(new_node)
3945 # make sure we redistribute the config
3946 self.cfg.Update(new_node, feedback_fn)
3947 # and make sure the new node will not have old files around
3948 if not new_node.master_candidate:
3949 result = self.rpc.call_node_demote_from_mc(new_node.name)
3950 msg = result.fail_msg
3952 self.LogWarning("Node failed to demote itself from master"
3953 " candidate status: %s" % msg)
3955 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3956 self.context.AddNode(new_node, self.proc.GetECId())
3959 class LUSetNodeParams(LogicalUnit):
3960 """Modifies the parameters of a node.
3963 HPATH = "node-modify"
3964 HTYPE = constants.HTYPE_NODE
3967 ("master_candidate", None, _TMaybeBool),
3968 ("offline", None, _TMaybeBool),
3969 ("drained", None, _TMaybeBool),
3970 ("auto_promote", False, _TBool),
3975 def CheckArguments(self):
3976 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3977 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3978 if all_mods.count(None) == 3:
3979 raise errors.OpPrereqError("Please pass at least one modification",
3981 if all_mods.count(True) > 1:
3982 raise errors.OpPrereqError("Can't set the node into more than one"
3983 " state at the same time",
3986 # Boolean value that tells us whether we're offlining or draining the node
3987 self.offline_or_drain = (self.op.offline == True or
3988 self.op.drained == True)
3989 self.deoffline_or_drain = (self.op.offline == False or
3990 self.op.drained == False)
3991 self.might_demote = (self.op.master_candidate == False or
3992 self.offline_or_drain)
3994 self.lock_all = self.op.auto_promote and self.might_demote
3997 def ExpandNames(self):
3999 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4001 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4003 def BuildHooksEnv(self):
4006 This runs on the master node.
4010 "OP_TARGET": self.op.node_name,
4011 "MASTER_CANDIDATE": str(self.op.master_candidate),
4012 "OFFLINE": str(self.op.offline),
4013 "DRAINED": str(self.op.drained),
4015 nl = [self.cfg.GetMasterNode(),
4019 def CheckPrereq(self):
4020 """Check prerequisites.
4022 This only checks the instance list against the existing names.
4025 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4027 if (self.op.master_candidate is not None or
4028 self.op.drained is not None or
4029 self.op.offline is not None):
4030 # we can't change the master's node flags
4031 if self.op.node_name == self.cfg.GetMasterNode():
4032 raise errors.OpPrereqError("The master role can be changed"
4033 " only via master-failover",
4037 if node.master_candidate and self.might_demote and not self.lock_all:
4038 assert not self.op.auto_promote, "auto-promote set but lock_all not"
4039 # check if after removing the current node, we're missing master
4041 (mc_remaining, mc_should, _) = \
4042 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4043 if mc_remaining < mc_should:
4044 raise errors.OpPrereqError("Not enough master candidates, please"
4045 " pass auto_promote to allow promotion",
4048 if (self.op.master_candidate == True and
4049 ((node.offline and not self.op.offline == False) or
4050 (node.drained and not self.op.drained == False))):
4051 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4052 " to master_candidate" % node.name,
4055 # If we're being deofflined/drained, we'll MC ourself if needed
4056 if (self.deoffline_or_drain and not self.offline_or_drain and not
4057 self.op.master_candidate == True and not node.master_candidate):
4058 self.op.master_candidate = _DecideSelfPromotion(self)
4059 if self.op.master_candidate:
4060 self.LogInfo("Autopromoting node to master candidate")
4064 def Exec(self, feedback_fn):
4073 if self.op.offline is not None:
4074 node.offline = self.op.offline
4075 result.append(("offline", str(self.op.offline)))
4076 if self.op.offline == True:
4077 if node.master_candidate:
4078 node.master_candidate = False
4080 result.append(("master_candidate", "auto-demotion due to offline"))
4082 node.drained = False
4083 result.append(("drained", "clear drained status due to offline"))
4085 if self.op.master_candidate is not None:
4086 node.master_candidate = self.op.master_candidate
4088 result.append(("master_candidate", str(self.op.master_candidate)))
4089 if self.op.master_candidate == False:
4090 rrc = self.rpc.call_node_demote_from_mc(node.name)
4093 self.LogWarning("Node failed to demote itself: %s" % msg)
4095 if self.op.drained is not None:
4096 node.drained = self.op.drained
4097 result.append(("drained", str(self.op.drained)))
4098 if self.op.drained == True:
4099 if node.master_candidate:
4100 node.master_candidate = False
4102 result.append(("master_candidate", "auto-demotion due to drain"))
4103 rrc = self.rpc.call_node_demote_from_mc(node.name)
4106 self.LogWarning("Node failed to demote itself: %s" % msg)
4108 node.offline = False
4109 result.append(("offline", "clear offline status due to drain"))
4111 # we locked all nodes, we adjust the CP before updating this node
4113 _AdjustCandidatePool(self, [node.name])
4115 # this will trigger configuration file update, if needed
4116 self.cfg.Update(node, feedback_fn)
4118 # this will trigger job queue propagation or cleanup
4120 self.context.ReaddNode(node)
4125 class LUPowercycleNode(NoHooksLU):
4126 """Powercycles a node.
4135 def CheckArguments(self):
4136 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4137 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4138 raise errors.OpPrereqError("The node is the master and the force"
4139 " parameter was not set",
4142 def ExpandNames(self):
4143 """Locking for PowercycleNode.
4145 This is a last-resort option and shouldn't block on other
4146 jobs. Therefore, we grab no locks.
4149 self.needed_locks = {}
4151 def Exec(self, feedback_fn):
4155 result = self.rpc.call_node_powercycle(self.op.node_name,
4156 self.cfg.GetHypervisorType())
4157 result.Raise("Failed to schedule the reboot")
4158 return result.payload
4161 class LUQueryClusterInfo(NoHooksLU):
4162 """Query cluster configuration.
4167 def ExpandNames(self):
4168 self.needed_locks = {}
4170 def Exec(self, feedback_fn):
4171 """Return cluster config.
4174 cluster = self.cfg.GetClusterInfo()
4177 # Filter just for enabled hypervisors
4178 for os_name, hv_dict in cluster.os_hvp.items():
4179 os_hvp[os_name] = {}
4180 for hv_name, hv_params in hv_dict.items():
4181 if hv_name in cluster.enabled_hypervisors:
4182 os_hvp[os_name][hv_name] = hv_params
4185 "software_version": constants.RELEASE_VERSION,
4186 "protocol_version": constants.PROTOCOL_VERSION,
4187 "config_version": constants.CONFIG_VERSION,
4188 "os_api_version": max(constants.OS_API_VERSIONS),
4189 "export_version": constants.EXPORT_VERSION,
4190 "architecture": (platform.architecture()[0], platform.machine()),
4191 "name": cluster.cluster_name,
4192 "master": cluster.master_node,
4193 "default_hypervisor": cluster.enabled_hypervisors[0],
4194 "enabled_hypervisors": cluster.enabled_hypervisors,
4195 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4196 for hypervisor_name in cluster.enabled_hypervisors]),
4198 "beparams": cluster.beparams,
4199 "osparams": cluster.osparams,
4200 "nicparams": cluster.nicparams,
4201 "candidate_pool_size": cluster.candidate_pool_size,
4202 "master_netdev": cluster.master_netdev,
4203 "volume_group_name": cluster.volume_group_name,
4204 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4205 "file_storage_dir": cluster.file_storage_dir,
4206 "maintain_node_health": cluster.maintain_node_health,
4207 "ctime": cluster.ctime,
4208 "mtime": cluster.mtime,
4209 "uuid": cluster.uuid,
4210 "tags": list(cluster.GetTags()),
4211 "uid_pool": cluster.uid_pool,
4212 "default_iallocator": cluster.default_iallocator,
4213 "reserved_lvs": cluster.reserved_lvs,
4219 class LUQueryConfigValues(NoHooksLU):
4220 """Return configuration values.
4223 _OP_PARAMS = [_POutputFields]
4225 _FIELDS_DYNAMIC = utils.FieldSet()
4226 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4227 "watcher_pause", "volume_group_name")
4229 def CheckArguments(self):
4230 _CheckOutputFields(static=self._FIELDS_STATIC,
4231 dynamic=self._FIELDS_DYNAMIC,
4232 selected=self.op.output_fields)
4234 def ExpandNames(self):
4235 self.needed_locks = {}
4237 def Exec(self, feedback_fn):
4238 """Dump a representation of the cluster config to the standard output.
4242 for field in self.op.output_fields:
4243 if field == "cluster_name":
4244 entry = self.cfg.GetClusterName()
4245 elif field == "master_node":
4246 entry = self.cfg.GetMasterNode()
4247 elif field == "drain_flag":
4248 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4249 elif field == "watcher_pause":
4250 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4251 elif field == "volume_group_name":
4252 entry = self.cfg.GetVGName()
4254 raise errors.ParameterError(field)
4255 values.append(entry)
4259 class LUActivateInstanceDisks(NoHooksLU):
4260 """Bring up an instance's disks.
4265 ("ignore_size", False, _TBool),
4269 def ExpandNames(self):
4270 self._ExpandAndLockInstance()
4271 self.needed_locks[locking.LEVEL_NODE] = []
4272 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4274 def DeclareLocks(self, level):
4275 if level == locking.LEVEL_NODE:
4276 self._LockInstancesNodes()
4278 def CheckPrereq(self):
4279 """Check prerequisites.
4281 This checks that the instance is in the cluster.
4284 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4285 assert self.instance is not None, \
4286 "Cannot retrieve locked instance %s" % self.op.instance_name
4287 _CheckNodeOnline(self, self.instance.primary_node)
4289 def Exec(self, feedback_fn):
4290 """Activate the disks.
4293 disks_ok, disks_info = \
4294 _AssembleInstanceDisks(self, self.instance,
4295 ignore_size=self.op.ignore_size)
4297 raise errors.OpExecError("Cannot activate block devices")
4302 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4304 """Prepare the block devices for an instance.
4306 This sets up the block devices on all nodes.
4308 @type lu: L{LogicalUnit}
4309 @param lu: the logical unit on whose behalf we execute
4310 @type instance: L{objects.Instance}
4311 @param instance: the instance for whose disks we assemble
4312 @type disks: list of L{objects.Disk} or None
4313 @param disks: which disks to assemble (or all, if None)
4314 @type ignore_secondaries: boolean
4315 @param ignore_secondaries: if true, errors on secondary nodes
4316 won't result in an error return from the function
4317 @type ignore_size: boolean
4318 @param ignore_size: if true, the current known size of the disk
4319 will not be used during the disk activation, useful for cases
4320 when the size is wrong
4321 @return: False if the operation failed, otherwise a list of
4322 (host, instance_visible_name, node_visible_name)
4323 with the mapping from node devices to instance devices
4328 iname = instance.name
4329 disks = _ExpandCheckDisks(instance, disks)
4331 # With the two passes mechanism we try to reduce the window of
4332 # opportunity for the race condition of switching DRBD to primary
4333 # before handshaking occured, but we do not eliminate it
4335 # The proper fix would be to wait (with some limits) until the
4336 # connection has been made and drbd transitions from WFConnection
4337 # into any other network-connected state (Connected, SyncTarget,
4340 # 1st pass, assemble on all nodes in secondary mode
4341 for inst_disk in disks:
4342 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4344 node_disk = node_disk.Copy()
4345 node_disk.UnsetSize()
4346 lu.cfg.SetDiskID(node_disk, node)
4347 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4348 msg = result.fail_msg
4350 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4351 " (is_primary=False, pass=1): %s",
4352 inst_disk.iv_name, node, msg)
4353 if not ignore_secondaries:
4356 # FIXME: race condition on drbd migration to primary
4358 # 2nd pass, do only the primary node
4359 for inst_disk in disks:
4362 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4363 if node != instance.primary_node:
4366 node_disk = node_disk.Copy()
4367 node_disk.UnsetSize()
4368 lu.cfg.SetDiskID(node_disk, node)
4369 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4370 msg = result.fail_msg
4372 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4373 " (is_primary=True, pass=2): %s",
4374 inst_disk.iv_name, node, msg)
4377 dev_path = result.payload
4379 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4381 # leave the disks configured for the primary node
4382 # this is a workaround that would be fixed better by
4383 # improving the logical/physical id handling
4385 lu.cfg.SetDiskID(disk, instance.primary_node)
4387 return disks_ok, device_info
4390 def _StartInstanceDisks(lu, instance, force):
4391 """Start the disks of an instance.
4394 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4395 ignore_secondaries=force)
4397 _ShutdownInstanceDisks(lu, instance)
4398 if force is not None and not force:
4399 lu.proc.LogWarning("", hint="If the message above refers to a"
4401 " you can retry the operation using '--force'.")
4402 raise errors.OpExecError("Disk consistency error")
4405 class LUDeactivateInstanceDisks(NoHooksLU):
4406 """Shutdown an instance's disks.
4414 def ExpandNames(self):
4415 self._ExpandAndLockInstance()
4416 self.needed_locks[locking.LEVEL_NODE] = []
4417 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4419 def DeclareLocks(self, level):
4420 if level == locking.LEVEL_NODE:
4421 self._LockInstancesNodes()
4423 def CheckPrereq(self):
4424 """Check prerequisites.
4426 This checks that the instance is in the cluster.
4429 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4430 assert self.instance is not None, \
4431 "Cannot retrieve locked instance %s" % self.op.instance_name
4433 def Exec(self, feedback_fn):
4434 """Deactivate the disks
4437 instance = self.instance
4438 _SafeShutdownInstanceDisks(self, instance)
4441 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4442 """Shutdown block devices of an instance.
4444 This function checks if an instance is running, before calling
4445 _ShutdownInstanceDisks.
4448 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4449 _ShutdownInstanceDisks(lu, instance, disks=disks)
4452 def _ExpandCheckDisks(instance, disks):
4453 """Return the instance disks selected by the disks list
4455 @type disks: list of L{objects.Disk} or None
4456 @param disks: selected disks
4457 @rtype: list of L{objects.Disk}
4458 @return: selected instance disks to act on
4462 return instance.disks
4464 if not set(disks).issubset(instance.disks):
4465 raise errors.ProgrammerError("Can only act on disks belonging to the"
4470 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4471 """Shutdown block devices of an instance.
4473 This does the shutdown on all nodes of the instance.
4475 If the ignore_primary is false, errors on the primary node are
4480 disks = _ExpandCheckDisks(instance, disks)
4483 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4484 lu.cfg.SetDiskID(top_disk, node)
4485 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4486 msg = result.fail_msg
4488 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4489 disk.iv_name, node, msg)
4490 if not ignore_primary or node != instance.primary_node:
4495 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4496 """Checks if a node has enough free memory.
4498 This function check if a given node has the needed amount of free
4499 memory. In case the node has less memory or we cannot get the
4500 information from the node, this function raise an OpPrereqError
4503 @type lu: C{LogicalUnit}
4504 @param lu: a logical unit from which we get configuration data
4506 @param node: the node to check
4507 @type reason: C{str}
4508 @param reason: string to use in the error message
4509 @type requested: C{int}
4510 @param requested: the amount of memory in MiB to check for
4511 @type hypervisor_name: C{str}
4512 @param hypervisor_name: the hypervisor to ask for memory stats
4513 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4514 we cannot check the node
4517 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4518 nodeinfo[node].Raise("Can't get data from node %s" % node,
4519 prereq=True, ecode=errors.ECODE_ENVIRON)
4520 free_mem = nodeinfo[node].payload.get('memory_free', None)
4521 if not isinstance(free_mem, int):
4522 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4523 " was '%s'" % (node, free_mem),
4524 errors.ECODE_ENVIRON)
4525 if requested > free_mem:
4526 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4527 " needed %s MiB, available %s MiB" %
4528 (node, reason, requested, free_mem),
4532 def _CheckNodesFreeDisk(lu, nodenames, requested):
4533 """Checks if nodes have enough free disk space in the default VG.
4535 This function check if all given nodes have the needed amount of
4536 free disk. In case any node has less disk or we cannot get the
4537 information from the node, this function raise an OpPrereqError
4540 @type lu: C{LogicalUnit}
4541 @param lu: a logical unit from which we get configuration data
4542 @type nodenames: C{list}
4543 @param nodenames: the list of node names to check
4544 @type requested: C{int}
4545 @param requested: the amount of disk in MiB to check for
4546 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4547 we cannot check the node
4550 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4551 lu.cfg.GetHypervisorType())
4552 for node in nodenames:
4553 info = nodeinfo[node]
4554 info.Raise("Cannot get current information from node %s" % node,
4555 prereq=True, ecode=errors.ECODE_ENVIRON)
4556 vg_free = info.payload.get("vg_free", None)
4557 if not isinstance(vg_free, int):
4558 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4559 " result was '%s'" % (node, vg_free),
4560 errors.ECODE_ENVIRON)
4561 if requested > vg_free:
4562 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4563 " required %d MiB, available %d MiB" %
4564 (node, requested, vg_free),
4568 class LUStartupInstance(LogicalUnit):
4569 """Starts an instance.
4572 HPATH = "instance-start"
4573 HTYPE = constants.HTYPE_INSTANCE
4577 ("hvparams", _EmptyDict, _TDict),
4578 ("beparams", _EmptyDict, _TDict),
4582 def CheckArguments(self):
4584 if self.op.beparams:
4585 # fill the beparams dict
4586 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4588 def ExpandNames(self):
4589 self._ExpandAndLockInstance()
4591 def BuildHooksEnv(self):
4594 This runs on master, primary and secondary nodes of the instance.
4598 "FORCE": self.op.force,
4600 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4601 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4604 def CheckPrereq(self):
4605 """Check prerequisites.
4607 This checks that the instance is in the cluster.
4610 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4611 assert self.instance is not None, \
4612 "Cannot retrieve locked instance %s" % self.op.instance_name
4615 if self.op.hvparams:
4616 # check hypervisor parameter syntax (locally)
4617 cluster = self.cfg.GetClusterInfo()
4618 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4619 filled_hvp = cluster.FillHV(instance)
4620 filled_hvp.update(self.op.hvparams)
4621 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4622 hv_type.CheckParameterSyntax(filled_hvp)
4623 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4625 _CheckNodeOnline(self, instance.primary_node)
4627 bep = self.cfg.GetClusterInfo().FillBE(instance)
4628 # check bridges existence
4629 _CheckInstanceBridgesExist(self, instance)
4631 remote_info = self.rpc.call_instance_info(instance.primary_node,
4633 instance.hypervisor)
4634 remote_info.Raise("Error checking node %s" % instance.primary_node,
4635 prereq=True, ecode=errors.ECODE_ENVIRON)
4636 if not remote_info.payload: # not running already
4637 _CheckNodeFreeMemory(self, instance.primary_node,
4638 "starting instance %s" % instance.name,
4639 bep[constants.BE_MEMORY], instance.hypervisor)
4641 def Exec(self, feedback_fn):
4642 """Start the instance.
4645 instance = self.instance
4646 force = self.op.force
4648 self.cfg.MarkInstanceUp(instance.name)
4650 node_current = instance.primary_node
4652 _StartInstanceDisks(self, instance, force)
4654 result = self.rpc.call_instance_start(node_current, instance,
4655 self.op.hvparams, self.op.beparams)
4656 msg = result.fail_msg
4658 _ShutdownInstanceDisks(self, instance)
4659 raise errors.OpExecError("Could not start instance: %s" % msg)
4662 class LURebootInstance(LogicalUnit):
4663 """Reboot an instance.
4666 HPATH = "instance-reboot"
4667 HTYPE = constants.HTYPE_INSTANCE
4670 ("ignore_secondaries", False, _TBool),
4671 ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4676 def ExpandNames(self):
4677 self._ExpandAndLockInstance()
4679 def BuildHooksEnv(self):
4682 This runs on master, primary and secondary nodes of the instance.
4686 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4687 "REBOOT_TYPE": self.op.reboot_type,
4688 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4690 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4691 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4694 def CheckPrereq(self):
4695 """Check prerequisites.
4697 This checks that the instance is in the cluster.
4700 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4701 assert self.instance is not None, \
4702 "Cannot retrieve locked instance %s" % self.op.instance_name
4704 _CheckNodeOnline(self, instance.primary_node)
4706 # check bridges existence
4707 _CheckInstanceBridgesExist(self, instance)
4709 def Exec(self, feedback_fn):
4710 """Reboot the instance.
4713 instance = self.instance
4714 ignore_secondaries = self.op.ignore_secondaries
4715 reboot_type = self.op.reboot_type
4717 node_current = instance.primary_node
4719 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4720 constants.INSTANCE_REBOOT_HARD]:
4721 for disk in instance.disks:
4722 self.cfg.SetDiskID(disk, node_current)
4723 result = self.rpc.call_instance_reboot(node_current, instance,
4725 self.op.shutdown_timeout)
4726 result.Raise("Could not reboot instance")
4728 result = self.rpc.call_instance_shutdown(node_current, instance,
4729 self.op.shutdown_timeout)
4730 result.Raise("Could not shutdown instance for full reboot")
4731 _ShutdownInstanceDisks(self, instance)
4732 _StartInstanceDisks(self, instance, ignore_secondaries)
4733 result = self.rpc.call_instance_start(node_current, instance, None, None)
4734 msg = result.fail_msg
4736 _ShutdownInstanceDisks(self, instance)
4737 raise errors.OpExecError("Could not start instance for"
4738 " full reboot: %s" % msg)
4740 self.cfg.MarkInstanceUp(instance.name)
4743 class LUShutdownInstance(LogicalUnit):
4744 """Shutdown an instance.
4747 HPATH = "instance-stop"
4748 HTYPE = constants.HTYPE_INSTANCE
4751 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4755 def ExpandNames(self):
4756 self._ExpandAndLockInstance()
4758 def BuildHooksEnv(self):
4761 This runs on master, primary and secondary nodes of the instance.
4764 env = _BuildInstanceHookEnvByObject(self, self.instance)
4765 env["TIMEOUT"] = self.op.timeout
4766 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4769 def CheckPrereq(self):
4770 """Check prerequisites.
4772 This checks that the instance is in the cluster.
4775 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4776 assert self.instance is not None, \
4777 "Cannot retrieve locked instance %s" % self.op.instance_name
4778 _CheckNodeOnline(self, self.instance.primary_node)
4780 def Exec(self, feedback_fn):
4781 """Shutdown the instance.
4784 instance = self.instance
4785 node_current = instance.primary_node
4786 timeout = self.op.timeout
4787 self.cfg.MarkInstanceDown(instance.name)
4788 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4789 msg = result.fail_msg
4791 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4793 _ShutdownInstanceDisks(self, instance)
4796 class LUReinstallInstance(LogicalUnit):
4797 """Reinstall an instance.
4800 HPATH = "instance-reinstall"
4801 HTYPE = constants.HTYPE_INSTANCE
4804 ("os_type", None, _TMaybeString),
4805 ("force_variant", False, _TBool),
4809 def ExpandNames(self):
4810 self._ExpandAndLockInstance()
4812 def BuildHooksEnv(self):
4815 This runs on master, primary and secondary nodes of the instance.
4818 env = _BuildInstanceHookEnvByObject(self, self.instance)
4819 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4822 def CheckPrereq(self):
4823 """Check prerequisites.
4825 This checks that the instance is in the cluster and is not running.
4828 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4829 assert instance is not None, \
4830 "Cannot retrieve locked instance %s" % self.op.instance_name
4831 _CheckNodeOnline(self, instance.primary_node)
4833 if instance.disk_template == constants.DT_DISKLESS:
4834 raise errors.OpPrereqError("Instance '%s' has no disks" %
4835 self.op.instance_name,
4837 _CheckInstanceDown(self, instance, "cannot reinstall")
4839 if self.op.os_type is not None:
4841 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4842 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4844 self.instance = instance
4846 def Exec(self, feedback_fn):
4847 """Reinstall the instance.
4850 inst = self.instance
4852 if self.op.os_type is not None:
4853 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4854 inst.os = self.op.os_type
4855 self.cfg.Update(inst, feedback_fn)
4857 _StartInstanceDisks(self, inst, None)
4859 feedback_fn("Running the instance OS create scripts...")
4860 # FIXME: pass debug option from opcode to backend
4861 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4862 self.op.debug_level)
4863 result.Raise("Could not install OS for instance %s on node %s" %
4864 (inst.name, inst.primary_node))
4866 _ShutdownInstanceDisks(self, inst)
4869 class LURecreateInstanceDisks(LogicalUnit):
4870 """Recreate an instance's missing disks.
4873 HPATH = "instance-recreate-disks"
4874 HTYPE = constants.HTYPE_INSTANCE
4877 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4881 def ExpandNames(self):
4882 self._ExpandAndLockInstance()
4884 def BuildHooksEnv(self):
4887 This runs on master, primary and secondary nodes of the instance.
4890 env = _BuildInstanceHookEnvByObject(self, self.instance)
4891 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4894 def CheckPrereq(self):
4895 """Check prerequisites.
4897 This checks that the instance is in the cluster and is not running.
4900 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4901 assert instance is not None, \
4902 "Cannot retrieve locked instance %s" % self.op.instance_name
4903 _CheckNodeOnline(self, instance.primary_node)
4905 if instance.disk_template == constants.DT_DISKLESS:
4906 raise errors.OpPrereqError("Instance '%s' has no disks" %
4907 self.op.instance_name, errors.ECODE_INVAL)
4908 _CheckInstanceDown(self, instance, "cannot recreate disks")
4910 if not self.op.disks:
4911 self.op.disks = range(len(instance.disks))
4913 for idx in self.op.disks:
4914 if idx >= len(instance.disks):
4915 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4918 self.instance = instance
4920 def Exec(self, feedback_fn):
4921 """Recreate the disks.
4925 for idx, _ in enumerate(self.instance.disks):
4926 if idx not in self.op.disks: # disk idx has not been passed in
4930 _CreateDisks(self, self.instance, to_skip=to_skip)
4933 class LURenameInstance(LogicalUnit):
4934 """Rename an instance.
4937 HPATH = "instance-rename"
4938 HTYPE = constants.HTYPE_INSTANCE
4941 ("new_name", _NoDefault, _TNonEmptyString),
4942 ("ip_check", False, _TBool),
4943 ("name_check", True, _TBool),
4946 def CheckArguments(self):
4950 if self.op.ip_check and not self.op.name_check:
4951 # TODO: make the ip check more flexible and not depend on the name check
4952 raise errors.OpPrereqError("Cannot do ip check without a name check",
4955 def BuildHooksEnv(self):
4958 This runs on master, primary and secondary nodes of the instance.
4961 env = _BuildInstanceHookEnvByObject(self, self.instance)
4962 env["INSTANCE_NEW_NAME"] = self.op.new_name
4963 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4966 def CheckPrereq(self):
4967 """Check prerequisites.
4969 This checks that the instance is in the cluster and is not running.
4972 self.op.instance_name = _ExpandInstanceName(self.cfg,
4973 self.op.instance_name)
4974 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4975 assert instance is not None
4976 _CheckNodeOnline(self, instance.primary_node)
4977 _CheckInstanceDown(self, instance, "cannot rename")
4978 self.instance = instance
4980 new_name = self.op.new_name
4981 if self.op.name_check:
4982 hostinfo = netutils.HostInfo(netutils.HostInfo.NormalizeName(new_name))
4983 new_name = self.op.new_name = hostinfo.name
4984 if (self.op.ip_check and
4985 netutils.TcpPing(hostinfo.ip, constants.DEFAULT_NODED_PORT)):
4986 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4987 (hostinfo.ip, new_name),
4988 errors.ECODE_NOTUNIQUE)
4990 instance_list = self.cfg.GetInstanceList()
4991 if new_name in instance_list:
4992 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4993 new_name, errors.ECODE_EXISTS)
4996 def Exec(self, feedback_fn):
4997 """Reinstall the instance.
5000 inst = self.instance
5001 old_name = inst.name
5003 if inst.disk_template == constants.DT_FILE:
5004 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5006 self.cfg.RenameInstance(inst.name, self.op.new_name)
5007 # Change the instance lock. This is definitely safe while we hold the BGL
5008 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5009 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5011 # re-read the instance from the configuration after rename
5012 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5014 if inst.disk_template == constants.DT_FILE:
5015 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5016 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5017 old_file_storage_dir,
5018 new_file_storage_dir)
5019 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5020 " (but the instance has been renamed in Ganeti)" %
5021 (inst.primary_node, old_file_storage_dir,
5022 new_file_storage_dir))
5024 _StartInstanceDisks(self, inst, None)
5026 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5027 old_name, self.op.debug_level)
5028 msg = result.fail_msg
5030 msg = ("Could not run OS rename script for instance %s on node %s"
5031 " (but the instance has been renamed in Ganeti): %s" %
5032 (inst.name, inst.primary_node, msg))
5033 self.proc.LogWarning(msg)
5035 _ShutdownInstanceDisks(self, inst)
5040 class LURemoveInstance(LogicalUnit):
5041 """Remove an instance.
5044 HPATH = "instance-remove"
5045 HTYPE = constants.HTYPE_INSTANCE
5048 ("ignore_failures", False, _TBool),
5053 def ExpandNames(self):
5054 self._ExpandAndLockInstance()
5055 self.needed_locks[locking.LEVEL_NODE] = []
5056 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5058 def DeclareLocks(self, level):
5059 if level == locking.LEVEL_NODE:
5060 self._LockInstancesNodes()
5062 def BuildHooksEnv(self):
5065 This runs on master, primary and secondary nodes of the instance.
5068 env = _BuildInstanceHookEnvByObject(self, self.instance)
5069 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5070 nl = [self.cfg.GetMasterNode()]
5071 nl_post = list(self.instance.all_nodes) + nl
5072 return env, nl, nl_post
5074 def CheckPrereq(self):
5075 """Check prerequisites.
5077 This checks that the instance is in the cluster.
5080 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5081 assert self.instance is not None, \
5082 "Cannot retrieve locked instance %s" % self.op.instance_name
5084 def Exec(self, feedback_fn):
5085 """Remove the instance.
5088 instance = self.instance
5089 logging.info("Shutting down instance %s on node %s",
5090 instance.name, instance.primary_node)
5092 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5093 self.op.shutdown_timeout)
5094 msg = result.fail_msg
5096 if self.op.ignore_failures:
5097 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5099 raise errors.OpExecError("Could not shutdown instance %s on"
5101 (instance.name, instance.primary_node, msg))
5103 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5106 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5107 """Utility function to remove an instance.
5110 logging.info("Removing block devices for instance %s", instance.name)
5112 if not _RemoveDisks(lu, instance):
5113 if not ignore_failures:
5114 raise errors.OpExecError("Can't remove instance's disks")
5115 feedback_fn("Warning: can't remove instance's disks")
5117 logging.info("Removing instance %s out of cluster config", instance.name)
5119 lu.cfg.RemoveInstance(instance.name)
5121 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5122 "Instance lock removal conflict"
5124 # Remove lock for the instance
5125 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5128 class LUQueryInstances(NoHooksLU):
5129 """Logical unit for querying instances.
5132 # pylint: disable-msg=W0142
5134 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5135 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5136 ("use_locking", False, _TBool),
5139 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5140 "serial_no", "ctime", "mtime", "uuid"]
5141 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5143 "disk_template", "ip", "mac", "bridge",
5144 "nic_mode", "nic_link",
5145 "sda_size", "sdb_size", "vcpus", "tags",
5146 "network_port", "beparams",
5147 r"(disk)\.(size)/([0-9]+)",
5148 r"(disk)\.(sizes)", "disk_usage",
5149 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5150 r"(nic)\.(bridge)/([0-9]+)",
5151 r"(nic)\.(macs|ips|modes|links|bridges)",
5152 r"(disk|nic)\.(count)",
5154 ] + _SIMPLE_FIELDS +
5156 for name in constants.HVS_PARAMETERS
5157 if name not in constants.HVC_GLOBALS] +
5159 for name in constants.BES_PARAMETERS])
5160 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5166 def CheckArguments(self):
5167 _CheckOutputFields(static=self._FIELDS_STATIC,
5168 dynamic=self._FIELDS_DYNAMIC,
5169 selected=self.op.output_fields)
5171 def ExpandNames(self):
5172 self.needed_locks = {}
5173 self.share_locks[locking.LEVEL_INSTANCE] = 1
5174 self.share_locks[locking.LEVEL_NODE] = 1
5177 self.wanted = _GetWantedInstances(self, self.op.names)
5179 self.wanted = locking.ALL_SET
5181 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5182 self.do_locking = self.do_node_query and self.op.use_locking
5184 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5185 self.needed_locks[locking.LEVEL_NODE] = []
5186 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5188 def DeclareLocks(self, level):
5189 if level == locking.LEVEL_NODE and self.do_locking:
5190 self._LockInstancesNodes()
5192 def Exec(self, feedback_fn):
5193 """Computes the list of nodes and their attributes.
5196 # pylint: disable-msg=R0912
5197 # way too many branches here
5198 all_info = self.cfg.GetAllInstancesInfo()
5199 if self.wanted == locking.ALL_SET:
5200 # caller didn't specify instance names, so ordering is not important
5202 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5204 instance_names = all_info.keys()
5205 instance_names = utils.NiceSort(instance_names)
5207 # caller did specify names, so we must keep the ordering
5209 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5211 tgt_set = all_info.keys()
5212 missing = set(self.wanted).difference(tgt_set)
5214 raise errors.OpExecError("Some instances were removed before"
5215 " retrieving their data: %s" % missing)
5216 instance_names = self.wanted
5218 instance_list = [all_info[iname] for iname in instance_names]
5220 # begin data gathering
5222 nodes = frozenset([inst.primary_node for inst in instance_list])
5223 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5227 if self.do_node_query:
5229 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5231 result = node_data[name]
5233 # offline nodes will be in both lists
5234 off_nodes.append(name)
5236 bad_nodes.append(name)
5239 live_data.update(result.payload)
5240 # else no instance is alive
5242 live_data = dict([(name, {}) for name in instance_names])
5244 # end data gathering
5249 cluster = self.cfg.GetClusterInfo()
5250 for instance in instance_list:
5252 i_hv = cluster.FillHV(instance, skip_globals=True)
5253 i_be = cluster.FillBE(instance)
5254 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5255 for field in self.op.output_fields:
5256 st_match = self._FIELDS_STATIC.Matches(field)
5257 if field in self._SIMPLE_FIELDS:
5258 val = getattr(instance, field)
5259 elif field == "pnode":
5260 val = instance.primary_node
5261 elif field == "snodes":
5262 val = list(instance.secondary_nodes)
5263 elif field == "admin_state":
5264 val = instance.admin_up
5265 elif field == "oper_state":
5266 if instance.primary_node in bad_nodes:
5269 val = bool(live_data.get(instance.name))
5270 elif field == "status":
5271 if instance.primary_node in off_nodes:
5272 val = "ERROR_nodeoffline"
5273 elif instance.primary_node in bad_nodes:
5274 val = "ERROR_nodedown"
5276 running = bool(live_data.get(instance.name))
5278 if instance.admin_up:
5283 if instance.admin_up:
5287 elif field == "oper_ram":
5288 if instance.primary_node in bad_nodes:
5290 elif instance.name in live_data:
5291 val = live_data[instance.name].get("memory", "?")
5294 elif field == "oper_vcpus":
5295 if instance.primary_node in bad_nodes:
5297 elif instance.name in live_data:
5298 val = live_data[instance.name].get("vcpus", "?")
5301 elif field == "vcpus":
5302 val = i_be[constants.BE_VCPUS]
5303 elif field == "disk_template":
5304 val = instance.disk_template
5307 val = instance.nics[0].ip
5310 elif field == "nic_mode":
5312 val = i_nicp[0][constants.NIC_MODE]
5315 elif field == "nic_link":
5317 val = i_nicp[0][constants.NIC_LINK]
5320 elif field == "bridge":
5321 if (instance.nics and
5322 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5323 val = i_nicp[0][constants.NIC_LINK]
5326 elif field == "mac":
5328 val = instance.nics[0].mac
5331 elif field == "sda_size" or field == "sdb_size":
5332 idx = ord(field[2]) - ord('a')
5334 val = instance.FindDisk(idx).size
5335 except errors.OpPrereqError:
5337 elif field == "disk_usage": # total disk usage per node
5338 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5339 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5340 elif field == "tags":
5341 val = list(instance.GetTags())
5342 elif field == "hvparams":
5344 elif (field.startswith(HVPREFIX) and
5345 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5346 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5347 val = i_hv.get(field[len(HVPREFIX):], None)
5348 elif field == "beparams":
5350 elif (field.startswith(BEPREFIX) and
5351 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5352 val = i_be.get(field[len(BEPREFIX):], None)
5353 elif st_match and st_match.groups():
5354 # matches a variable list
5355 st_groups = st_match.groups()
5356 if st_groups and st_groups[0] == "disk":
5357 if st_groups[1] == "count":
5358 val = len(instance.disks)
5359 elif st_groups[1] == "sizes":
5360 val = [disk.size for disk in instance.disks]
5361 elif st_groups[1] == "size":
5363 val = instance.FindDisk(st_groups[2]).size
5364 except errors.OpPrereqError:
5367 assert False, "Unhandled disk parameter"
5368 elif st_groups[0] == "nic":
5369 if st_groups[1] == "count":
5370 val = len(instance.nics)
5371 elif st_groups[1] == "macs":
5372 val = [nic.mac for nic in instance.nics]
5373 elif st_groups[1] == "ips":
5374 val = [nic.ip for nic in instance.nics]
5375 elif st_groups[1] == "modes":
5376 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5377 elif st_groups[1] == "links":
5378 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5379 elif st_groups[1] == "bridges":
5382 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5383 val.append(nicp[constants.NIC_LINK])
5388 nic_idx = int(st_groups[2])
5389 if nic_idx >= len(instance.nics):
5392 if st_groups[1] == "mac":
5393 val = instance.nics[nic_idx].mac
5394 elif st_groups[1] == "ip":
5395 val = instance.nics[nic_idx].ip
5396 elif st_groups[1] == "mode":
5397 val = i_nicp[nic_idx][constants.NIC_MODE]
5398 elif st_groups[1] == "link":
5399 val = i_nicp[nic_idx][constants.NIC_LINK]
5400 elif st_groups[1] == "bridge":
5401 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5402 if nic_mode == constants.NIC_MODE_BRIDGED:
5403 val = i_nicp[nic_idx][constants.NIC_LINK]
5407 assert False, "Unhandled NIC parameter"
5409 assert False, ("Declared but unhandled variable parameter '%s'" %
5412 assert False, "Declared but unhandled parameter '%s'" % field
5419 class LUFailoverInstance(LogicalUnit):
5420 """Failover an instance.
5423 HPATH = "instance-failover"
5424 HTYPE = constants.HTYPE_INSTANCE
5427 ("ignore_consistency", False, _TBool),
5432 def ExpandNames(self):
5433 self._ExpandAndLockInstance()
5434 self.needed_locks[locking.LEVEL_NODE] = []
5435 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5437 def DeclareLocks(self, level):
5438 if level == locking.LEVEL_NODE:
5439 self._LockInstancesNodes()
5441 def BuildHooksEnv(self):
5444 This runs on master, primary and secondary nodes of the instance.
5447 instance = self.instance
5448 source_node = instance.primary_node
5449 target_node = instance.secondary_nodes[0]
5451 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5452 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5453 "OLD_PRIMARY": source_node,
5454 "OLD_SECONDARY": target_node,
5455 "NEW_PRIMARY": target_node,
5456 "NEW_SECONDARY": source_node,
5458 env.update(_BuildInstanceHookEnvByObject(self, instance))
5459 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5461 nl_post.append(source_node)
5462 return env, nl, nl_post
5464 def CheckPrereq(self):
5465 """Check prerequisites.
5467 This checks that the instance is in the cluster.
5470 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5471 assert self.instance is not None, \
5472 "Cannot retrieve locked instance %s" % self.op.instance_name
5474 bep = self.cfg.GetClusterInfo().FillBE(instance)
5475 if instance.disk_template not in constants.DTS_NET_MIRROR:
5476 raise errors.OpPrereqError("Instance's disk layout is not"
5477 " network mirrored, cannot failover.",
5480 secondary_nodes = instance.secondary_nodes
5481 if not secondary_nodes:
5482 raise errors.ProgrammerError("no secondary node but using "
5483 "a mirrored disk template")
5485 target_node = secondary_nodes[0]
5486 _CheckNodeOnline(self, target_node)
5487 _CheckNodeNotDrained(self, target_node)
5488 if instance.admin_up:
5489 # check memory requirements on the secondary node
5490 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5491 instance.name, bep[constants.BE_MEMORY],
5492 instance.hypervisor)
5494 self.LogInfo("Not checking memory on the secondary node as"
5495 " instance will not be started")
5497 # check bridge existance
5498 _CheckInstanceBridgesExist(self, instance, node=target_node)
5500 def Exec(self, feedback_fn):
5501 """Failover an instance.
5503 The failover is done by shutting it down on its present node and
5504 starting it on the secondary.
5507 instance = self.instance
5509 source_node = instance.primary_node
5510 target_node = instance.secondary_nodes[0]
5512 if instance.admin_up:
5513 feedback_fn("* checking disk consistency between source and target")
5514 for dev in instance.disks:
5515 # for drbd, these are drbd over lvm
5516 if not _CheckDiskConsistency(self, dev, target_node, False):
5517 if not self.op.ignore_consistency:
5518 raise errors.OpExecError("Disk %s is degraded on target node,"
5519 " aborting failover." % dev.iv_name)
5521 feedback_fn("* not checking disk consistency as instance is not running")
5523 feedback_fn("* shutting down instance on source node")
5524 logging.info("Shutting down instance %s on node %s",
5525 instance.name, source_node)
5527 result = self.rpc.call_instance_shutdown(source_node, instance,
5528 self.op.shutdown_timeout)
5529 msg = result.fail_msg
5531 if self.op.ignore_consistency:
5532 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5533 " Proceeding anyway. Please make sure node"
5534 " %s is down. Error details: %s",
5535 instance.name, source_node, source_node, msg)
5537 raise errors.OpExecError("Could not shutdown instance %s on"
5539 (instance.name, source_node, msg))
5541 feedback_fn("* deactivating the instance's disks on source node")
5542 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5543 raise errors.OpExecError("Can't shut down the instance's disks.")
5545 instance.primary_node = target_node
5546 # distribute new instance config to the other nodes
5547 self.cfg.Update(instance, feedback_fn)
5549 # Only start the instance if it's marked as up
5550 if instance.admin_up:
5551 feedback_fn("* activating the instance's disks on target node")
5552 logging.info("Starting instance %s on node %s",
5553 instance.name, target_node)
5555 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5556 ignore_secondaries=True)
5558 _ShutdownInstanceDisks(self, instance)
5559 raise errors.OpExecError("Can't activate the instance's disks")
5561 feedback_fn("* starting the instance on the target node")
5562 result = self.rpc.call_instance_start(target_node, instance, None, None)
5563 msg = result.fail_msg
5565 _ShutdownInstanceDisks(self, instance)
5566 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5567 (instance.name, target_node, msg))
5570 class LUMigrateInstance(LogicalUnit):
5571 """Migrate an instance.
5573 This is migration without shutting down, compared to the failover,
5574 which is done with shutdown.
5577 HPATH = "instance-migrate"
5578 HTYPE = constants.HTYPE_INSTANCE
5583 ("cleanup", False, _TBool),
5588 def ExpandNames(self):
5589 self._ExpandAndLockInstance()
5591 self.needed_locks[locking.LEVEL_NODE] = []
5592 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5594 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5596 self.tasklets = [self._migrater]
5598 def DeclareLocks(self, level):
5599 if level == locking.LEVEL_NODE:
5600 self._LockInstancesNodes()
5602 def BuildHooksEnv(self):
5605 This runs on master, primary and secondary nodes of the instance.
5608 instance = self._migrater.instance
5609 source_node = instance.primary_node
5610 target_node = instance.secondary_nodes[0]
5611 env = _BuildInstanceHookEnvByObject(self, instance)
5612 env["MIGRATE_LIVE"] = self._migrater.live
5613 env["MIGRATE_CLEANUP"] = self.op.cleanup
5615 "OLD_PRIMARY": source_node,
5616 "OLD_SECONDARY": target_node,
5617 "NEW_PRIMARY": target_node,
5618 "NEW_SECONDARY": source_node,
5620 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5622 nl_post.append(source_node)
5623 return env, nl, nl_post
5626 class LUMoveInstance(LogicalUnit):
5627 """Move an instance by data-copying.
5630 HPATH = "instance-move"
5631 HTYPE = constants.HTYPE_INSTANCE
5634 ("target_node", _NoDefault, _TNonEmptyString),
5639 def ExpandNames(self):
5640 self._ExpandAndLockInstance()
5641 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5642 self.op.target_node = target_node
5643 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5644 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5646 def DeclareLocks(self, level):
5647 if level == locking.LEVEL_NODE:
5648 self._LockInstancesNodes(primary_only=True)
5650 def BuildHooksEnv(self):
5653 This runs on master, primary and secondary nodes of the instance.
5657 "TARGET_NODE": self.op.target_node,
5658 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5660 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5661 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5662 self.op.target_node]
5665 def CheckPrereq(self):
5666 """Check prerequisites.
5668 This checks that the instance is in the cluster.
5671 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5672 assert self.instance is not None, \
5673 "Cannot retrieve locked instance %s" % self.op.instance_name
5675 node = self.cfg.GetNodeInfo(self.op.target_node)
5676 assert node is not None, \
5677 "Cannot retrieve locked node %s" % self.op.target_node
5679 self.target_node = target_node = node.name
5681 if target_node == instance.primary_node:
5682 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5683 (instance.name, target_node),
5686 bep = self.cfg.GetClusterInfo().FillBE(instance)
5688 for idx, dsk in enumerate(instance.disks):
5689 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5690 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5691 " cannot copy" % idx, errors.ECODE_STATE)
5693 _CheckNodeOnline(self, target_node)
5694 _CheckNodeNotDrained(self, target_node)
5696 if instance.admin_up:
5697 # check memory requirements on the secondary node
5698 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5699 instance.name, bep[constants.BE_MEMORY],
5700 instance.hypervisor)
5702 self.LogInfo("Not checking memory on the secondary node as"
5703 " instance will not be started")
5705 # check bridge existance
5706 _CheckInstanceBridgesExist(self, instance, node=target_node)
5708 def Exec(self, feedback_fn):
5709 """Move an instance.
5711 The move is done by shutting it down on its present node, copying
5712 the data over (slow) and starting it on the new node.
5715 instance = self.instance
5717 source_node = instance.primary_node
5718 target_node = self.target_node
5720 self.LogInfo("Shutting down instance %s on source node %s",
5721 instance.name, source_node)
5723 result = self.rpc.call_instance_shutdown(source_node, instance,
5724 self.op.shutdown_timeout)
5725 msg = result.fail_msg
5727 if self.op.ignore_consistency:
5728 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5729 " Proceeding anyway. Please make sure node"
5730 " %s is down. Error details: %s",
5731 instance.name, source_node, source_node, msg)
5733 raise errors.OpExecError("Could not shutdown instance %s on"
5735 (instance.name, source_node, msg))
5737 # create the target disks
5739 _CreateDisks(self, instance, target_node=target_node)
5740 except errors.OpExecError:
5741 self.LogWarning("Device creation failed, reverting...")
5743 _RemoveDisks(self, instance, target_node=target_node)
5745 self.cfg.ReleaseDRBDMinors(instance.name)
5748 cluster_name = self.cfg.GetClusterInfo().cluster_name
5751 # activate, get path, copy the data over
5752 for idx, disk in enumerate(instance.disks):
5753 self.LogInfo("Copying data for disk %d", idx)
5754 result = self.rpc.call_blockdev_assemble(target_node, disk,
5755 instance.name, True)
5757 self.LogWarning("Can't assemble newly created disk %d: %s",
5758 idx, result.fail_msg)
5759 errs.append(result.fail_msg)
5761 dev_path = result.payload
5762 result = self.rpc.call_blockdev_export(source_node, disk,
5763 target_node, dev_path,
5766 self.LogWarning("Can't copy data over for disk %d: %s",
5767 idx, result.fail_msg)
5768 errs.append(result.fail_msg)
5772 self.LogWarning("Some disks failed to copy, aborting")
5774 _RemoveDisks(self, instance, target_node=target_node)
5776 self.cfg.ReleaseDRBDMinors(instance.name)
5777 raise errors.OpExecError("Errors during disk copy: %s" %
5780 instance.primary_node = target_node
5781 self.cfg.Update(instance, feedback_fn)
5783 self.LogInfo("Removing the disks on the original node")
5784 _RemoveDisks(self, instance, target_node=source_node)
5786 # Only start the instance if it's marked as up
5787 if instance.admin_up:
5788 self.LogInfo("Starting instance %s on node %s",
5789 instance.name, target_node)
5791 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5792 ignore_secondaries=True)
5794 _ShutdownInstanceDisks(self, instance)
5795 raise errors.OpExecError("Can't activate the instance's disks")
5797 result = self.rpc.call_instance_start(target_node, instance, None, None)
5798 msg = result.fail_msg
5800 _ShutdownInstanceDisks(self, instance)
5801 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5802 (instance.name, target_node, msg))
5805 class LUMigrateNode(LogicalUnit):
5806 """Migrate all instances from a node.
5809 HPATH = "node-migrate"
5810 HTYPE = constants.HTYPE_NODE
5818 def ExpandNames(self):
5819 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5821 self.needed_locks = {
5822 locking.LEVEL_NODE: [self.op.node_name],
5825 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5827 # Create tasklets for migrating instances for all instances on this node
5831 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5832 logging.debug("Migrating instance %s", inst.name)
5833 names.append(inst.name)
5835 tasklets.append(TLMigrateInstance(self, inst.name, False))
5837 self.tasklets = tasklets
5839 # Declare instance locks
5840 self.needed_locks[locking.LEVEL_INSTANCE] = names
5842 def DeclareLocks(self, level):
5843 if level == locking.LEVEL_NODE:
5844 self._LockInstancesNodes()
5846 def BuildHooksEnv(self):
5849 This runs on the master, the primary and all the secondaries.
5853 "NODE_NAME": self.op.node_name,
5856 nl = [self.cfg.GetMasterNode()]
5858 return (env, nl, nl)
5861 class TLMigrateInstance(Tasklet):
5862 """Tasklet class for instance migration.
5865 @ivar live: whether the migration will be done live or non-live;
5866 this variable is initalized only after CheckPrereq has run
5869 def __init__(self, lu, instance_name, cleanup):
5870 """Initializes this class.
5873 Tasklet.__init__(self, lu)
5876 self.instance_name = instance_name
5877 self.cleanup = cleanup
5878 self.live = False # will be overridden later
5880 def CheckPrereq(self):
5881 """Check prerequisites.
5883 This checks that the instance is in the cluster.
5886 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5887 instance = self.cfg.GetInstanceInfo(instance_name)
5888 assert instance is not None
5890 if instance.disk_template != constants.DT_DRBD8:
5891 raise errors.OpPrereqError("Instance's disk layout is not"
5892 " drbd8, cannot migrate.", errors.ECODE_STATE)
5894 secondary_nodes = instance.secondary_nodes
5895 if not secondary_nodes:
5896 raise errors.ConfigurationError("No secondary node but using"
5897 " drbd8 disk template")
5899 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5901 target_node = secondary_nodes[0]
5902 # check memory requirements on the secondary node
5903 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5904 instance.name, i_be[constants.BE_MEMORY],
5905 instance.hypervisor)
5907 # check bridge existance
5908 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5910 if not self.cleanup:
5911 _CheckNodeNotDrained(self.lu, target_node)
5912 result = self.rpc.call_instance_migratable(instance.primary_node,
5914 result.Raise("Can't migrate, please use failover",
5915 prereq=True, ecode=errors.ECODE_STATE)
5917 self.instance = instance
5919 if self.lu.op.live is not None and self.lu.op.mode is not None:
5920 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5921 " parameters are accepted",
5923 if self.lu.op.live is not None:
5925 self.lu.op.mode = constants.HT_MIGRATION_LIVE
5927 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5928 # reset the 'live' parameter to None so that repeated
5929 # invocations of CheckPrereq do not raise an exception
5930 self.lu.op.live = None
5931 elif self.lu.op.mode is None:
5932 # read the default value from the hypervisor
5933 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5934 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5936 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5938 def _WaitUntilSync(self):
5939 """Poll with custom rpc for disk sync.
5941 This uses our own step-based rpc call.
5944 self.feedback_fn("* wait until resync is done")
5948 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5950 self.instance.disks)
5952 for node, nres in result.items():
5953 nres.Raise("Cannot resync disks on node %s" % node)
5954 node_done, node_percent = nres.payload
5955 all_done = all_done and node_done
5956 if node_percent is not None:
5957 min_percent = min(min_percent, node_percent)
5959 if min_percent < 100:
5960 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5963 def _EnsureSecondary(self, node):
5964 """Demote a node to secondary.
5967 self.feedback_fn("* switching node %s to secondary mode" % node)
5969 for dev in self.instance.disks:
5970 self.cfg.SetDiskID(dev, node)
5972 result = self.rpc.call_blockdev_close(node, self.instance.name,
5973 self.instance.disks)
5974 result.Raise("Cannot change disk to secondary on node %s" % node)
5976 def _GoStandalone(self):
5977 """Disconnect from the network.
5980 self.feedback_fn("* changing into standalone mode")
5981 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5982 self.instance.disks)
5983 for node, nres in result.items():
5984 nres.Raise("Cannot disconnect disks node %s" % node)
5986 def _GoReconnect(self, multimaster):
5987 """Reconnect to the network.
5993 msg = "single-master"
5994 self.feedback_fn("* changing disks into %s mode" % msg)
5995 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5996 self.instance.disks,
5997 self.instance.name, multimaster)
5998 for node, nres in result.items():
5999 nres.Raise("Cannot change disks config on node %s" % node)
6001 def _ExecCleanup(self):
6002 """Try to cleanup after a failed migration.
6004 The cleanup is done by:
6005 - check that the instance is running only on one node
6006 (and update the config if needed)
6007 - change disks on its secondary node to secondary
6008 - wait until disks are fully synchronized
6009 - disconnect from the network
6010 - change disks into single-master mode
6011 - wait again until disks are fully synchronized
6014 instance = self.instance
6015 target_node = self.target_node
6016 source_node = self.source_node
6018 # check running on only one node
6019 self.feedback_fn("* checking where the instance actually runs"
6020 " (if this hangs, the hypervisor might be in"
6022 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6023 for node, result in ins_l.items():
6024 result.Raise("Can't contact node %s" % node)
6026 runningon_source = instance.name in ins_l[source_node].payload
6027 runningon_target = instance.name in ins_l[target_node].payload
6029 if runningon_source and runningon_target:
6030 raise errors.OpExecError("Instance seems to be running on two nodes,"
6031 " or the hypervisor is confused. You will have"
6032 " to ensure manually that it runs only on one"
6033 " and restart this operation.")
6035 if not (runningon_source or runningon_target):
6036 raise errors.OpExecError("Instance does not seem to be running at all."
6037 " In this case, it's safer to repair by"
6038 " running 'gnt-instance stop' to ensure disk"
6039 " shutdown, and then restarting it.")
6041 if runningon_target:
6042 # the migration has actually succeeded, we need to update the config
6043 self.feedback_fn("* instance running on secondary node (%s),"
6044 " updating config" % target_node)
6045 instance.primary_node = target_node
6046 self.cfg.Update(instance, self.feedback_fn)
6047 demoted_node = source_node
6049 self.feedback_fn("* instance confirmed to be running on its"
6050 " primary node (%s)" % source_node)
6051 demoted_node = target_node
6053 self._EnsureSecondary(demoted_node)
6055 self._WaitUntilSync()
6056 except errors.OpExecError:
6057 # we ignore here errors, since if the device is standalone, it
6058 # won't be able to sync
6060 self._GoStandalone()
6061 self._GoReconnect(False)
6062 self._WaitUntilSync()
6064 self.feedback_fn("* done")
6066 def _RevertDiskStatus(self):
6067 """Try to revert the disk status after a failed migration.
6070 target_node = self.target_node
6072 self._EnsureSecondary(target_node)
6073 self._GoStandalone()
6074 self._GoReconnect(False)
6075 self._WaitUntilSync()
6076 except errors.OpExecError, err:
6077 self.lu.LogWarning("Migration failed and I can't reconnect the"
6078 " drives: error '%s'\n"
6079 "Please look and recover the instance status" %
6082 def _AbortMigration(self):
6083 """Call the hypervisor code to abort a started migration.
6086 instance = self.instance
6087 target_node = self.target_node
6088 migration_info = self.migration_info
6090 abort_result = self.rpc.call_finalize_migration(target_node,
6094 abort_msg = abort_result.fail_msg
6096 logging.error("Aborting migration failed on target node %s: %s",
6097 target_node, abort_msg)
6098 # Don't raise an exception here, as we stil have to try to revert the
6099 # disk status, even if this step failed.
6101 def _ExecMigration(self):
6102 """Migrate an instance.
6104 The migrate is done by:
6105 - change the disks into dual-master mode
6106 - wait until disks are fully synchronized again
6107 - migrate the instance
6108 - change disks on the new secondary node (the old primary) to secondary
6109 - wait until disks are fully synchronized
6110 - change disks into single-master mode
6113 instance = self.instance
6114 target_node = self.target_node
6115 source_node = self.source_node
6117 self.feedback_fn("* checking disk consistency between source and target")
6118 for dev in instance.disks:
6119 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6120 raise errors.OpExecError("Disk %s is degraded or not fully"
6121 " synchronized on target node,"
6122 " aborting migrate." % dev.iv_name)
6124 # First get the migration information from the remote node
6125 result = self.rpc.call_migration_info(source_node, instance)
6126 msg = result.fail_msg
6128 log_err = ("Failed fetching source migration information from %s: %s" %
6130 logging.error(log_err)
6131 raise errors.OpExecError(log_err)
6133 self.migration_info = migration_info = result.payload
6135 # Then switch the disks to master/master mode
6136 self._EnsureSecondary(target_node)
6137 self._GoStandalone()
6138 self._GoReconnect(True)
6139 self._WaitUntilSync()
6141 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6142 result = self.rpc.call_accept_instance(target_node,
6145 self.nodes_ip[target_node])
6147 msg = result.fail_msg
6149 logging.error("Instance pre-migration failed, trying to revert"
6150 " disk status: %s", msg)
6151 self.feedback_fn("Pre-migration failed, aborting")
6152 self._AbortMigration()
6153 self._RevertDiskStatus()
6154 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6155 (instance.name, msg))
6157 self.feedback_fn("* migrating instance to %s" % target_node)
6159 result = self.rpc.call_instance_migrate(source_node, instance,
6160 self.nodes_ip[target_node],
6162 msg = result.fail_msg
6164 logging.error("Instance migration failed, trying to revert"
6165 " disk status: %s", msg)
6166 self.feedback_fn("Migration failed, aborting")
6167 self._AbortMigration()
6168 self._RevertDiskStatus()
6169 raise errors.OpExecError("Could not migrate instance %s: %s" %
6170 (instance.name, msg))
6173 instance.primary_node = target_node
6174 # distribute new instance config to the other nodes
6175 self.cfg.Update(instance, self.feedback_fn)
6177 result = self.rpc.call_finalize_migration(target_node,
6181 msg = result.fail_msg
6183 logging.error("Instance migration succeeded, but finalization failed:"
6185 raise errors.OpExecError("Could not finalize instance migration: %s" %
6188 self._EnsureSecondary(source_node)
6189 self._WaitUntilSync()
6190 self._GoStandalone()
6191 self._GoReconnect(False)
6192 self._WaitUntilSync()
6194 self.feedback_fn("* done")
6196 def Exec(self, feedback_fn):
6197 """Perform the migration.
6200 feedback_fn("Migrating instance %s" % self.instance.name)
6202 self.feedback_fn = feedback_fn
6204 self.source_node = self.instance.primary_node
6205 self.target_node = self.instance.secondary_nodes[0]
6206 self.all_nodes = [self.source_node, self.target_node]
6208 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6209 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6213 return self._ExecCleanup()
6215 return self._ExecMigration()
6218 def _CreateBlockDev(lu, node, instance, device, force_create,
6220 """Create a tree of block devices on a given node.
6222 If this device type has to be created on secondaries, create it and
6225 If not, just recurse to children keeping the same 'force' value.
6227 @param lu: the lu on whose behalf we execute
6228 @param node: the node on which to create the device
6229 @type instance: L{objects.Instance}
6230 @param instance: the instance which owns the device
6231 @type device: L{objects.Disk}
6232 @param device: the device to create
6233 @type force_create: boolean
6234 @param force_create: whether to force creation of this device; this
6235 will be change to True whenever we find a device which has
6236 CreateOnSecondary() attribute
6237 @param info: the extra 'metadata' we should attach to the device
6238 (this will be represented as a LVM tag)
6239 @type force_open: boolean
6240 @param force_open: this parameter will be passes to the
6241 L{backend.BlockdevCreate} function where it specifies
6242 whether we run on primary or not, and it affects both
6243 the child assembly and the device own Open() execution
6246 if device.CreateOnSecondary():
6250 for child in device.children:
6251 _CreateBlockDev(lu, node, instance, child, force_create,
6254 if not force_create:
6257 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6260 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6261 """Create a single block device on a given node.
6263 This will not recurse over children of the device, so they must be
6266 @param lu: the lu on whose behalf we execute
6267 @param node: the node on which to create the device
6268 @type instance: L{objects.Instance}
6269 @param instance: the instance which owns the device
6270 @type device: L{objects.Disk}
6271 @param device: the device to create
6272 @param info: the extra 'metadata' we should attach to the device
6273 (this will be represented as a LVM tag)
6274 @type force_open: boolean
6275 @param force_open: this parameter will be passes to the
6276 L{backend.BlockdevCreate} function where it specifies
6277 whether we run on primary or not, and it affects both
6278 the child assembly and the device own Open() execution
6281 lu.cfg.SetDiskID(device, node)
6282 result = lu.rpc.call_blockdev_create(node, device, device.size,
6283 instance.name, force_open, info)
6284 result.Raise("Can't create block device %s on"
6285 " node %s for instance %s" % (device, node, instance.name))
6286 if device.physical_id is None:
6287 device.physical_id = result.payload
6290 def _GenerateUniqueNames(lu, exts):
6291 """Generate a suitable LV name.
6293 This will generate a logical volume name for the given instance.
6298 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6299 results.append("%s%s" % (new_id, val))
6303 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6305 """Generate a drbd8 device complete with its children.
6308 port = lu.cfg.AllocatePort()
6309 vgname = lu.cfg.GetVGName()
6310 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6311 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6312 logical_id=(vgname, names[0]))
6313 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6314 logical_id=(vgname, names[1]))
6315 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6316 logical_id=(primary, secondary, port,
6319 children=[dev_data, dev_meta],
6324 def _GenerateDiskTemplate(lu, template_name,
6325 instance_name, primary_node,
6326 secondary_nodes, disk_info,
6327 file_storage_dir, file_driver,
6329 """Generate the entire disk layout for a given template type.
6332 #TODO: compute space requirements
6334 vgname = lu.cfg.GetVGName()
6335 disk_count = len(disk_info)
6337 if template_name == constants.DT_DISKLESS:
6339 elif template_name == constants.DT_PLAIN:
6340 if len(secondary_nodes) != 0:
6341 raise errors.ProgrammerError("Wrong template configuration")
6343 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6344 for i in range(disk_count)])
6345 for idx, disk in enumerate(disk_info):
6346 disk_index = idx + base_index
6347 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6348 logical_id=(vgname, names[idx]),
6349 iv_name="disk/%d" % disk_index,
6351 disks.append(disk_dev)
6352 elif template_name == constants.DT_DRBD8:
6353 if len(secondary_nodes) != 1:
6354 raise errors.ProgrammerError("Wrong template configuration")
6355 remote_node = secondary_nodes[0]
6356 minors = lu.cfg.AllocateDRBDMinor(
6357 [primary_node, remote_node] * len(disk_info), instance_name)
6360 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6361 for i in range(disk_count)]):
6362 names.append(lv_prefix + "_data")
6363 names.append(lv_prefix + "_meta")
6364 for idx, disk in enumerate(disk_info):
6365 disk_index = idx + base_index
6366 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6367 disk["size"], names[idx*2:idx*2+2],
6368 "disk/%d" % disk_index,
6369 minors[idx*2], minors[idx*2+1])
6370 disk_dev.mode = disk["mode"]
6371 disks.append(disk_dev)
6372 elif template_name == constants.DT_FILE:
6373 if len(secondary_nodes) != 0:
6374 raise errors.ProgrammerError("Wrong template configuration")
6376 _RequireFileStorage()
6378 for idx, disk in enumerate(disk_info):
6379 disk_index = idx + base_index
6380 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6381 iv_name="disk/%d" % disk_index,
6382 logical_id=(file_driver,
6383 "%s/disk%d" % (file_storage_dir,
6386 disks.append(disk_dev)
6388 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6392 def _GetInstanceInfoText(instance):
6393 """Compute that text that should be added to the disk's metadata.
6396 return "originstname+%s" % instance.name
6399 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6400 """Create all disks for an instance.
6402 This abstracts away some work from AddInstance.
6404 @type lu: L{LogicalUnit}
6405 @param lu: the logical unit on whose behalf we execute
6406 @type instance: L{objects.Instance}
6407 @param instance: the instance whose disks we should create
6409 @param to_skip: list of indices to skip
6410 @type target_node: string
6411 @param target_node: if passed, overrides the target node for creation
6413 @return: the success of the creation
6416 info = _GetInstanceInfoText(instance)
6417 if target_node is None:
6418 pnode = instance.primary_node
6419 all_nodes = instance.all_nodes
6424 if instance.disk_template == constants.DT_FILE:
6425 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6426 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6428 result.Raise("Failed to create directory '%s' on"
6429 " node %s" % (file_storage_dir, pnode))
6431 # Note: this needs to be kept in sync with adding of disks in
6432 # LUSetInstanceParams
6433 for idx, device in enumerate(instance.disks):
6434 if to_skip and idx in to_skip:
6436 logging.info("Creating volume %s for instance %s",
6437 device.iv_name, instance.name)
6439 for node in all_nodes:
6440 f_create = node == pnode
6441 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6444 def _RemoveDisks(lu, instance, target_node=None):
6445 """Remove all disks for an instance.
6447 This abstracts away some work from `AddInstance()` and
6448 `RemoveInstance()`. Note that in case some of the devices couldn't
6449 be removed, the removal will continue with the other ones (compare
6450 with `_CreateDisks()`).
6452 @type lu: L{LogicalUnit}
6453 @param lu: the logical unit on whose behalf we execute
6454 @type instance: L{objects.Instance}
6455 @param instance: the instance whose disks we should remove
6456 @type target_node: string
6457 @param target_node: used to override the node on which to remove the disks
6459 @return: the success of the removal
6462 logging.info("Removing block devices for instance %s", instance.name)
6465 for device in instance.disks:
6467 edata = [(target_node, device)]
6469 edata = device.ComputeNodeTree(instance.primary_node)
6470 for node, disk in edata:
6471 lu.cfg.SetDiskID(disk, node)
6472 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6474 lu.LogWarning("Could not remove block device %s on node %s,"
6475 " continuing anyway: %s", device.iv_name, node, msg)
6478 if instance.disk_template == constants.DT_FILE:
6479 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6483 tgt = instance.primary_node
6484 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6486 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6487 file_storage_dir, instance.primary_node, result.fail_msg)
6493 def _ComputeDiskSize(disk_template, disks):
6494 """Compute disk size requirements in the volume group
6497 # Required free disk space as a function of disk and swap space
6499 constants.DT_DISKLESS: None,
6500 constants.DT_PLAIN: sum(d["size"] for d in disks),
6501 # 128 MB are added for drbd metadata for each disk
6502 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6503 constants.DT_FILE: None,
6506 if disk_template not in req_size_dict:
6507 raise errors.ProgrammerError("Disk template '%s' size requirement"
6508 " is unknown" % disk_template)
6510 return req_size_dict[disk_template]
6513 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6514 """Hypervisor parameter validation.
6516 This function abstract the hypervisor parameter validation to be
6517 used in both instance create and instance modify.
6519 @type lu: L{LogicalUnit}
6520 @param lu: the logical unit for which we check
6521 @type nodenames: list
6522 @param nodenames: the list of nodes on which we should check
6523 @type hvname: string
6524 @param hvname: the name of the hypervisor we should use
6525 @type hvparams: dict
6526 @param hvparams: the parameters which we need to check
6527 @raise errors.OpPrereqError: if the parameters are not valid
6530 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6533 for node in nodenames:
6537 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6540 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6541 """OS parameters validation.
6543 @type lu: L{LogicalUnit}
6544 @param lu: the logical unit for which we check
6545 @type required: boolean
6546 @param required: whether the validation should fail if the OS is not
6548 @type nodenames: list
6549 @param nodenames: the list of nodes on which we should check
6550 @type osname: string
6551 @param osname: the name of the hypervisor we should use
6552 @type osparams: dict
6553 @param osparams: the parameters which we need to check
6554 @raise errors.OpPrereqError: if the parameters are not valid
6557 result = lu.rpc.call_os_validate(required, nodenames, osname,
6558 [constants.OS_VALIDATE_PARAMETERS],
6560 for node, nres in result.items():
6561 # we don't check for offline cases since this should be run only
6562 # against the master node and/or an instance's nodes
6563 nres.Raise("OS Parameters validation failed on node %s" % node)
6564 if not nres.payload:
6565 lu.LogInfo("OS %s not found on node %s, validation skipped",
6569 class LUCreateInstance(LogicalUnit):
6570 """Create an instance.
6573 HPATH = "instance-add"
6574 HTYPE = constants.HTYPE_INSTANCE
6577 ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6578 ("start", True, _TBool),
6579 ("wait_for_sync", True, _TBool),
6580 ("ip_check", True, _TBool),
6581 ("name_check", True, _TBool),
6582 ("disks", _NoDefault, _TListOf(_TDict)),
6583 ("nics", _NoDefault, _TListOf(_TDict)),
6584 ("hvparams", _EmptyDict, _TDict),
6585 ("beparams", _EmptyDict, _TDict),
6586 ("osparams", _EmptyDict, _TDict),
6587 ("no_install", None, _TMaybeBool),
6588 ("os_type", None, _TMaybeString),
6589 ("force_variant", False, _TBool),
6590 ("source_handshake", None, _TOr(_TList, _TNone)),
6591 ("source_x509_ca", None, _TMaybeString),
6592 ("source_instance_name", None, _TMaybeString),
6593 ("src_node", None, _TMaybeString),
6594 ("src_path", None, _TMaybeString),
6595 ("pnode", None, _TMaybeString),
6596 ("snode", None, _TMaybeString),
6597 ("iallocator", None, _TMaybeString),
6598 ("hypervisor", None, _TMaybeString),
6599 ("disk_template", _NoDefault, _CheckDiskTemplate),
6600 ("identify_defaults", False, _TBool),
6601 ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6602 ("file_storage_dir", None, _TMaybeString),
6606 def CheckArguments(self):
6610 # do not require name_check to ease forward/backward compatibility
6612 if self.op.no_install and self.op.start:
6613 self.LogInfo("No-installation mode selected, disabling startup")
6614 self.op.start = False
6615 # validate/normalize the instance name
6616 self.op.instance_name = \
6617 netutils.HostInfo.NormalizeName(self.op.instance_name)
6619 if self.op.ip_check and not self.op.name_check:
6620 # TODO: make the ip check more flexible and not depend on the name check
6621 raise errors.OpPrereqError("Cannot do ip check without a name check",
6624 # check nics' parameter names
6625 for nic in self.op.nics:
6626 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6628 # check disks. parameter names and consistent adopt/no-adopt strategy
6629 has_adopt = has_no_adopt = False
6630 for disk in self.op.disks:
6631 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6636 if has_adopt and has_no_adopt:
6637 raise errors.OpPrereqError("Either all disks are adopted or none is",
6640 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6641 raise errors.OpPrereqError("Disk adoption is not supported for the"
6642 " '%s' disk template" %
6643 self.op.disk_template,
6645 if self.op.iallocator is not None:
6646 raise errors.OpPrereqError("Disk adoption not allowed with an"
6647 " iallocator script", errors.ECODE_INVAL)
6648 if self.op.mode == constants.INSTANCE_IMPORT:
6649 raise errors.OpPrereqError("Disk adoption not allowed for"
6650 " instance import", errors.ECODE_INVAL)
6652 self.adopt_disks = has_adopt
6654 # instance name verification
6655 if self.op.name_check:
6656 self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6657 self.op.instance_name = self.hostname1.name
6658 # used in CheckPrereq for ip ping check
6659 self.check_ip = self.hostname1.ip
6660 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6661 raise errors.OpPrereqError("Remote imports require names to be checked" %
6664 self.check_ip = None
6666 # file storage checks
6667 if (self.op.file_driver and
6668 not self.op.file_driver in constants.FILE_DRIVER):
6669 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6670 self.op.file_driver, errors.ECODE_INVAL)
6672 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6673 raise errors.OpPrereqError("File storage directory path not absolute",
6676 ### Node/iallocator related checks
6677 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6679 if self.op.pnode is not None:
6680 if self.op.disk_template in constants.DTS_NET_MIRROR:
6681 if self.op.snode is None:
6682 raise errors.OpPrereqError("The networked disk templates need"
6683 " a mirror node", errors.ECODE_INVAL)
6685 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6687 self.op.snode = None
6689 self._cds = _GetClusterDomainSecret()
6691 if self.op.mode == constants.INSTANCE_IMPORT:
6692 # On import force_variant must be True, because if we forced it at
6693 # initial install, our only chance when importing it back is that it
6695 self.op.force_variant = True
6697 if self.op.no_install:
6698 self.LogInfo("No-installation mode has no effect during import")
6700 elif self.op.mode == constants.INSTANCE_CREATE:
6701 if self.op.os_type is None:
6702 raise errors.OpPrereqError("No guest OS specified",
6704 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6705 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6706 " installation" % self.op.os_type,
6708 if self.op.disk_template is None:
6709 raise errors.OpPrereqError("No disk template specified",
6712 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6713 # Check handshake to ensure both clusters have the same domain secret
6714 src_handshake = self.op.source_handshake
6715 if not src_handshake:
6716 raise errors.OpPrereqError("Missing source handshake",
6719 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6722 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6725 # Load and check source CA
6726 self.source_x509_ca_pem = self.op.source_x509_ca
6727 if not self.source_x509_ca_pem:
6728 raise errors.OpPrereqError("Missing source X509 CA",
6732 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6734 except OpenSSL.crypto.Error, err:
6735 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6736 (err, ), errors.ECODE_INVAL)
6738 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6739 if errcode is not None:
6740 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6743 self.source_x509_ca = cert
6745 src_instance_name = self.op.source_instance_name
6746 if not src_instance_name:
6747 raise errors.OpPrereqError("Missing source instance name",
6750 norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6751 self.source_instance_name = netutils.GetHostInfo(norm_name).name
6754 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6755 self.op.mode, errors.ECODE_INVAL)
6757 def ExpandNames(self):
6758 """ExpandNames for CreateInstance.
6760 Figure out the right locks for instance creation.
6763 self.needed_locks = {}
6765 instance_name = self.op.instance_name
6766 # this is just a preventive check, but someone might still add this
6767 # instance in the meantime, and creation will fail at lock-add time
6768 if instance_name in self.cfg.GetInstanceList():
6769 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6770 instance_name, errors.ECODE_EXISTS)
6772 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6774 if self.op.iallocator:
6775 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6777 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6778 nodelist = [self.op.pnode]
6779 if self.op.snode is not None:
6780 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6781 nodelist.append(self.op.snode)
6782 self.needed_locks[locking.LEVEL_NODE] = nodelist
6784 # in case of import lock the source node too
6785 if self.op.mode == constants.INSTANCE_IMPORT:
6786 src_node = self.op.src_node
6787 src_path = self.op.src_path
6789 if src_path is None:
6790 self.op.src_path = src_path = self.op.instance_name
6792 if src_node is None:
6793 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6794 self.op.src_node = None
6795 if os.path.isabs(src_path):
6796 raise errors.OpPrereqError("Importing an instance from an absolute"
6797 " path requires a source node option.",
6800 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6801 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6802 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6803 if not os.path.isabs(src_path):
6804 self.op.src_path = src_path = \
6805 utils.PathJoin(constants.EXPORT_DIR, src_path)
6807 def _RunAllocator(self):
6808 """Run the allocator based on input opcode.
6811 nics = [n.ToDict() for n in self.nics]
6812 ial = IAllocator(self.cfg, self.rpc,
6813 mode=constants.IALLOCATOR_MODE_ALLOC,
6814 name=self.op.instance_name,
6815 disk_template=self.op.disk_template,
6818 vcpus=self.be_full[constants.BE_VCPUS],
6819 mem_size=self.be_full[constants.BE_MEMORY],
6822 hypervisor=self.op.hypervisor,
6825 ial.Run(self.op.iallocator)
6828 raise errors.OpPrereqError("Can't compute nodes using"
6829 " iallocator '%s': %s" %
6830 (self.op.iallocator, ial.info),
6832 if len(ial.result) != ial.required_nodes:
6833 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6834 " of nodes (%s), required %s" %
6835 (self.op.iallocator, len(ial.result),
6836 ial.required_nodes), errors.ECODE_FAULT)
6837 self.op.pnode = ial.result[0]
6838 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6839 self.op.instance_name, self.op.iallocator,
6840 utils.CommaJoin(ial.result))
6841 if ial.required_nodes == 2:
6842 self.op.snode = ial.result[1]
6844 def BuildHooksEnv(self):
6847 This runs on master, primary and secondary nodes of the instance.
6851 "ADD_MODE": self.op.mode,
6853 if self.op.mode == constants.INSTANCE_IMPORT:
6854 env["SRC_NODE"] = self.op.src_node
6855 env["SRC_PATH"] = self.op.src_path
6856 env["SRC_IMAGES"] = self.src_images
6858 env.update(_BuildInstanceHookEnv(
6859 name=self.op.instance_name,
6860 primary_node=self.op.pnode,
6861 secondary_nodes=self.secondaries,
6862 status=self.op.start,
6863 os_type=self.op.os_type,
6864 memory=self.be_full[constants.BE_MEMORY],
6865 vcpus=self.be_full[constants.BE_VCPUS],
6866 nics=_NICListToTuple(self, self.nics),
6867 disk_template=self.op.disk_template,
6868 disks=[(d["size"], d["mode"]) for d in self.disks],
6871 hypervisor_name=self.op.hypervisor,
6874 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6878 def _ReadExportInfo(self):
6879 """Reads the export information from disk.
6881 It will override the opcode source node and path with the actual
6882 information, if these two were not specified before.
6884 @return: the export information
6887 assert self.op.mode == constants.INSTANCE_IMPORT
6889 src_node = self.op.src_node
6890 src_path = self.op.src_path
6892 if src_node is None:
6893 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6894 exp_list = self.rpc.call_export_list(locked_nodes)
6896 for node in exp_list:
6897 if exp_list[node].fail_msg:
6899 if src_path in exp_list[node].payload:
6901 self.op.src_node = src_node = node
6902 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6906 raise errors.OpPrereqError("No export found for relative path %s" %
6907 src_path, errors.ECODE_INVAL)
6909 _CheckNodeOnline(self, src_node)
6910 result = self.rpc.call_export_info(src_node, src_path)
6911 result.Raise("No export or invalid export found in dir %s" % src_path)
6913 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6914 if not export_info.has_section(constants.INISECT_EXP):
6915 raise errors.ProgrammerError("Corrupted export config",
6916 errors.ECODE_ENVIRON)
6918 ei_version = export_info.get(constants.INISECT_EXP, "version")
6919 if (int(ei_version) != constants.EXPORT_VERSION):
6920 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6921 (ei_version, constants.EXPORT_VERSION),
6922 errors.ECODE_ENVIRON)
6925 def _ReadExportParams(self, einfo):
6926 """Use export parameters as defaults.
6928 In case the opcode doesn't specify (as in override) some instance
6929 parameters, then try to use them from the export information, if
6933 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6935 if self.op.disk_template is None:
6936 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6937 self.op.disk_template = einfo.get(constants.INISECT_INS,
6940 raise errors.OpPrereqError("No disk template specified and the export"
6941 " is missing the disk_template information",
6944 if not self.op.disks:
6945 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6947 # TODO: import the disk iv_name too
6948 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6949 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6950 disks.append({"size": disk_sz})
6951 self.op.disks = disks
6953 raise errors.OpPrereqError("No disk info specified and the export"
6954 " is missing the disk information",
6957 if (not self.op.nics and
6958 einfo.has_option(constants.INISECT_INS, "nic_count")):
6960 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6962 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6963 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6968 if (self.op.hypervisor is None and
6969 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6970 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6971 if einfo.has_section(constants.INISECT_HYP):
6972 # use the export parameters but do not override the ones
6973 # specified by the user
6974 for name, value in einfo.items(constants.INISECT_HYP):
6975 if name not in self.op.hvparams:
6976 self.op.hvparams[name] = value
6978 if einfo.has_section(constants.INISECT_BEP):
6979 # use the parameters, without overriding
6980 for name, value in einfo.items(constants.INISECT_BEP):
6981 if name not in self.op.beparams:
6982 self.op.beparams[name] = value
6984 # try to read the parameters old style, from the main section
6985 for name in constants.BES_PARAMETERS:
6986 if (name not in self.op.beparams and
6987 einfo.has_option(constants.INISECT_INS, name)):
6988 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6990 if einfo.has_section(constants.INISECT_OSP):
6991 # use the parameters, without overriding
6992 for name, value in einfo.items(constants.INISECT_OSP):
6993 if name not in self.op.osparams:
6994 self.op.osparams[name] = value
6996 def _RevertToDefaults(self, cluster):
6997 """Revert the instance parameters to the default values.
7001 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7002 for name in self.op.hvparams.keys():
7003 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7004 del self.op.hvparams[name]
7006 be_defs = cluster.SimpleFillBE({})
7007 for name in self.op.beparams.keys():
7008 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7009 del self.op.beparams[name]
7011 nic_defs = cluster.SimpleFillNIC({})
7012 for nic in self.op.nics:
7013 for name in constants.NICS_PARAMETERS:
7014 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7017 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7018 for name in self.op.osparams.keys():
7019 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7020 del self.op.osparams[name]
7022 def CheckPrereq(self):
7023 """Check prerequisites.
7026 if self.op.mode == constants.INSTANCE_IMPORT:
7027 export_info = self._ReadExportInfo()
7028 self._ReadExportParams(export_info)
7030 _CheckDiskTemplate(self.op.disk_template)
7032 if (not self.cfg.GetVGName() and
7033 self.op.disk_template not in constants.DTS_NOT_LVM):
7034 raise errors.OpPrereqError("Cluster does not support lvm-based"
7035 " instances", errors.ECODE_STATE)
7037 if self.op.hypervisor is None:
7038 self.op.hypervisor = self.cfg.GetHypervisorType()
7040 cluster = self.cfg.GetClusterInfo()
7041 enabled_hvs = cluster.enabled_hypervisors
7042 if self.op.hypervisor not in enabled_hvs:
7043 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7044 " cluster (%s)" % (self.op.hypervisor,
7045 ",".join(enabled_hvs)),
7048 # check hypervisor parameter syntax (locally)
7049 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7050 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7052 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7053 hv_type.CheckParameterSyntax(filled_hvp)
7054 self.hv_full = filled_hvp
7055 # check that we don't specify global parameters on an instance
7056 _CheckGlobalHvParams(self.op.hvparams)
7058 # fill and remember the beparams dict
7059 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7060 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7062 # build os parameters
7063 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7065 # now that hvp/bep are in final format, let's reset to defaults,
7067 if self.op.identify_defaults:
7068 self._RevertToDefaults(cluster)
7072 for idx, nic in enumerate(self.op.nics):
7073 nic_mode_req = nic.get("mode", None)
7074 nic_mode = nic_mode_req
7075 if nic_mode is None:
7076 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7078 # in routed mode, for the first nic, the default ip is 'auto'
7079 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7080 default_ip_mode = constants.VALUE_AUTO
7082 default_ip_mode = constants.VALUE_NONE
7084 # ip validity checks
7085 ip = nic.get("ip", default_ip_mode)
7086 if ip is None or ip.lower() == constants.VALUE_NONE:
7088 elif ip.lower() == constants.VALUE_AUTO:
7089 if not self.op.name_check:
7090 raise errors.OpPrereqError("IP address set to auto but name checks"
7091 " have been skipped. Aborting.",
7093 nic_ip = self.hostname1.ip
7095 if not netutils.IsValidIP4(ip):
7096 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7097 " like a valid IP" % ip,
7101 # TODO: check the ip address for uniqueness
7102 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7103 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7106 # MAC address verification
7107 mac = nic.get("mac", constants.VALUE_AUTO)
7108 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7109 mac = utils.NormalizeAndValidateMac(mac)
7112 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7113 except errors.ReservationError:
7114 raise errors.OpPrereqError("MAC address %s already in use"
7115 " in cluster" % mac,
7116 errors.ECODE_NOTUNIQUE)
7118 # bridge verification
7119 bridge = nic.get("bridge", None)
7120 link = nic.get("link", None)
7122 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7123 " at the same time", errors.ECODE_INVAL)
7124 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7125 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7132 nicparams[constants.NIC_MODE] = nic_mode_req
7134 nicparams[constants.NIC_LINK] = link
7136 check_params = cluster.SimpleFillNIC(nicparams)
7137 objects.NIC.CheckParameterSyntax(check_params)
7138 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7140 # disk checks/pre-build
7142 for disk in self.op.disks:
7143 mode = disk.get("mode", constants.DISK_RDWR)
7144 if mode not in constants.DISK_ACCESS_SET:
7145 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7146 mode, errors.ECODE_INVAL)
7147 size = disk.get("size", None)
7149 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7152 except (TypeError, ValueError):
7153 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7155 new_disk = {"size": size, "mode": mode}
7157 new_disk["adopt"] = disk["adopt"]
7158 self.disks.append(new_disk)
7160 if self.op.mode == constants.INSTANCE_IMPORT:
7162 # Check that the new instance doesn't have less disks than the export
7163 instance_disks = len(self.disks)
7164 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7165 if instance_disks < export_disks:
7166 raise errors.OpPrereqError("Not enough disks to import."
7167 " (instance: %d, export: %d)" %
7168 (instance_disks, export_disks),
7172 for idx in range(export_disks):
7173 option = 'disk%d_dump' % idx
7174 if export_info.has_option(constants.INISECT_INS, option):
7175 # FIXME: are the old os-es, disk sizes, etc. useful?
7176 export_name = export_info.get(constants.INISECT_INS, option)
7177 image = utils.PathJoin(self.op.src_path, export_name)
7178 disk_images.append(image)
7180 disk_images.append(False)
7182 self.src_images = disk_images
7184 old_name = export_info.get(constants.INISECT_INS, 'name')
7186 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7187 except (TypeError, ValueError), err:
7188 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7189 " an integer: %s" % str(err),
7191 if self.op.instance_name == old_name:
7192 for idx, nic in enumerate(self.nics):
7193 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7194 nic_mac_ini = 'nic%d_mac' % idx
7195 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7197 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7199 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7200 if self.op.ip_check:
7201 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7202 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7203 (self.check_ip, self.op.instance_name),
7204 errors.ECODE_NOTUNIQUE)
7206 #### mac address generation
7207 # By generating here the mac address both the allocator and the hooks get
7208 # the real final mac address rather than the 'auto' or 'generate' value.
7209 # There is a race condition between the generation and the instance object
7210 # creation, which means that we know the mac is valid now, but we're not
7211 # sure it will be when we actually add the instance. If things go bad
7212 # adding the instance will abort because of a duplicate mac, and the
7213 # creation job will fail.
7214 for nic in self.nics:
7215 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7216 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7220 if self.op.iallocator is not None:
7221 self._RunAllocator()
7223 #### node related checks
7225 # check primary node
7226 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7227 assert self.pnode is not None, \
7228 "Cannot retrieve locked node %s" % self.op.pnode
7230 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7231 pnode.name, errors.ECODE_STATE)
7233 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7234 pnode.name, errors.ECODE_STATE)
7236 self.secondaries = []
7238 # mirror node verification
7239 if self.op.disk_template in constants.DTS_NET_MIRROR:
7240 if self.op.snode == pnode.name:
7241 raise errors.OpPrereqError("The secondary node cannot be the"
7242 " primary node.", errors.ECODE_INVAL)
7243 _CheckNodeOnline(self, self.op.snode)
7244 _CheckNodeNotDrained(self, self.op.snode)
7245 self.secondaries.append(self.op.snode)
7247 nodenames = [pnode.name] + self.secondaries
7249 req_size = _ComputeDiskSize(self.op.disk_template,
7252 # Check lv size requirements, if not adopting
7253 if req_size is not None and not self.adopt_disks:
7254 _CheckNodesFreeDisk(self, nodenames, req_size)
7256 if self.adopt_disks: # instead, we must check the adoption data
7257 all_lvs = set([i["adopt"] for i in self.disks])
7258 if len(all_lvs) != len(self.disks):
7259 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7261 for lv_name in all_lvs:
7263 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7264 except errors.ReservationError:
7265 raise errors.OpPrereqError("LV named %s used by another instance" %
7266 lv_name, errors.ECODE_NOTUNIQUE)
7268 node_lvs = self.rpc.call_lv_list([pnode.name],
7269 self.cfg.GetVGName())[pnode.name]
7270 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7271 node_lvs = node_lvs.payload
7272 delta = all_lvs.difference(node_lvs.keys())
7274 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7275 utils.CommaJoin(delta),
7277 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7279 raise errors.OpPrereqError("Online logical volumes found, cannot"
7280 " adopt: %s" % utils.CommaJoin(online_lvs),
7282 # update the size of disk based on what is found
7283 for dsk in self.disks:
7284 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7286 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7288 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7289 # check OS parameters (remotely)
7290 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7292 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7294 # memory check on primary node
7296 _CheckNodeFreeMemory(self, self.pnode.name,
7297 "creating instance %s" % self.op.instance_name,
7298 self.be_full[constants.BE_MEMORY],
7301 self.dry_run_result = list(nodenames)
7303 def Exec(self, feedback_fn):
7304 """Create and add the instance to the cluster.
7307 instance = self.op.instance_name
7308 pnode_name = self.pnode.name
7310 ht_kind = self.op.hypervisor
7311 if ht_kind in constants.HTS_REQ_PORT:
7312 network_port = self.cfg.AllocatePort()
7316 if constants.ENABLE_FILE_STORAGE:
7317 # this is needed because os.path.join does not accept None arguments
7318 if self.op.file_storage_dir is None:
7319 string_file_storage_dir = ""
7321 string_file_storage_dir = self.op.file_storage_dir
7323 # build the full file storage dir path
7324 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7325 string_file_storage_dir, instance)
7327 file_storage_dir = ""
7329 disks = _GenerateDiskTemplate(self,
7330 self.op.disk_template,
7331 instance, pnode_name,
7335 self.op.file_driver,
7338 iobj = objects.Instance(name=instance, os=self.op.os_type,
7339 primary_node=pnode_name,
7340 nics=self.nics, disks=disks,
7341 disk_template=self.op.disk_template,
7343 network_port=network_port,
7344 beparams=self.op.beparams,
7345 hvparams=self.op.hvparams,
7346 hypervisor=self.op.hypervisor,
7347 osparams=self.op.osparams,
7350 if self.adopt_disks:
7351 # rename LVs to the newly-generated names; we need to construct
7352 # 'fake' LV disks with the old data, plus the new unique_id
7353 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7355 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7356 rename_to.append(t_dsk.logical_id)
7357 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7358 self.cfg.SetDiskID(t_dsk, pnode_name)
7359 result = self.rpc.call_blockdev_rename(pnode_name,
7360 zip(tmp_disks, rename_to))
7361 result.Raise("Failed to rename adoped LVs")
7363 feedback_fn("* creating instance disks...")
7365 _CreateDisks(self, iobj)
7366 except errors.OpExecError:
7367 self.LogWarning("Device creation failed, reverting...")
7369 _RemoveDisks(self, iobj)
7371 self.cfg.ReleaseDRBDMinors(instance)
7374 feedback_fn("adding instance %s to cluster config" % instance)
7376 self.cfg.AddInstance(iobj, self.proc.GetECId())
7378 # Declare that we don't want to remove the instance lock anymore, as we've
7379 # added the instance to the config
7380 del self.remove_locks[locking.LEVEL_INSTANCE]
7381 # Unlock all the nodes
7382 if self.op.mode == constants.INSTANCE_IMPORT:
7383 nodes_keep = [self.op.src_node]
7384 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7385 if node != self.op.src_node]
7386 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7387 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7389 self.context.glm.release(locking.LEVEL_NODE)
7390 del self.acquired_locks[locking.LEVEL_NODE]
7392 if self.op.wait_for_sync:
7393 disk_abort = not _WaitForSync(self, iobj)
7394 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7395 # make sure the disks are not degraded (still sync-ing is ok)
7397 feedback_fn("* checking mirrors status")
7398 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7403 _RemoveDisks(self, iobj)
7404 self.cfg.RemoveInstance(iobj.name)
7405 # Make sure the instance lock gets removed
7406 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7407 raise errors.OpExecError("There are some degraded disks for"
7410 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7411 if self.op.mode == constants.INSTANCE_CREATE:
7412 if not self.op.no_install:
7413 feedback_fn("* running the instance OS create scripts...")
7414 # FIXME: pass debug option from opcode to backend
7415 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7416 self.op.debug_level)
7417 result.Raise("Could not add os for instance %s"
7418 " on node %s" % (instance, pnode_name))
7420 elif self.op.mode == constants.INSTANCE_IMPORT:
7421 feedback_fn("* running the instance OS import scripts...")
7425 for idx, image in enumerate(self.src_images):
7429 # FIXME: pass debug option from opcode to backend
7430 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7431 constants.IEIO_FILE, (image, ),
7432 constants.IEIO_SCRIPT,
7433 (iobj.disks[idx], idx),
7435 transfers.append(dt)
7438 masterd.instance.TransferInstanceData(self, feedback_fn,
7439 self.op.src_node, pnode_name,
7440 self.pnode.secondary_ip,
7442 if not compat.all(import_result):
7443 self.LogWarning("Some disks for instance %s on node %s were not"
7444 " imported successfully" % (instance, pnode_name))
7446 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7447 feedback_fn("* preparing remote import...")
7448 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7449 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7451 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7452 self.source_x509_ca,
7453 self._cds, timeouts)
7454 if not compat.all(disk_results):
7455 # TODO: Should the instance still be started, even if some disks
7456 # failed to import (valid for local imports, too)?
7457 self.LogWarning("Some disks for instance %s on node %s were not"
7458 " imported successfully" % (instance, pnode_name))
7460 # Run rename script on newly imported instance
7461 assert iobj.name == instance
7462 feedback_fn("Running rename script for %s" % instance)
7463 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7464 self.source_instance_name,
7465 self.op.debug_level)
7467 self.LogWarning("Failed to run rename script for %s on node"
7468 " %s: %s" % (instance, pnode_name, result.fail_msg))
7471 # also checked in the prereq part
7472 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7476 iobj.admin_up = True
7477 self.cfg.Update(iobj, feedback_fn)
7478 logging.info("Starting instance %s on node %s", instance, pnode_name)
7479 feedback_fn("* starting instance...")
7480 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7481 result.Raise("Could not start instance")
7483 return list(iobj.all_nodes)
7486 class LUConnectConsole(NoHooksLU):
7487 """Connect to an instance's console.
7489 This is somewhat special in that it returns the command line that
7490 you need to run on the master node in order to connect to the
7499 def ExpandNames(self):
7500 self._ExpandAndLockInstance()
7502 def CheckPrereq(self):
7503 """Check prerequisites.
7505 This checks that the instance is in the cluster.
7508 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7509 assert self.instance is not None, \
7510 "Cannot retrieve locked instance %s" % self.op.instance_name
7511 _CheckNodeOnline(self, self.instance.primary_node)
7513 def Exec(self, feedback_fn):
7514 """Connect to the console of an instance
7517 instance = self.instance
7518 node = instance.primary_node
7520 node_insts = self.rpc.call_instance_list([node],
7521 [instance.hypervisor])[node]
7522 node_insts.Raise("Can't get node information from %s" % node)
7524 if instance.name not in node_insts.payload:
7525 raise errors.OpExecError("Instance %s is not running." % instance.name)
7527 logging.debug("Connecting to console of %s on %s", instance.name, node)
7529 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7530 cluster = self.cfg.GetClusterInfo()
7531 # beparams and hvparams are passed separately, to avoid editing the
7532 # instance and then saving the defaults in the instance itself.
7533 hvparams = cluster.FillHV(instance)
7534 beparams = cluster.FillBE(instance)
7535 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7538 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7541 class LUReplaceDisks(LogicalUnit):
7542 """Replace the disks of an instance.
7545 HPATH = "mirrors-replace"
7546 HTYPE = constants.HTYPE_INSTANCE
7549 ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7550 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7551 ("remote_node", None, _TMaybeString),
7552 ("iallocator", None, _TMaybeString),
7553 ("early_release", False, _TBool),
7557 def CheckArguments(self):
7558 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7561 def ExpandNames(self):
7562 self._ExpandAndLockInstance()
7564 if self.op.iallocator is not None:
7565 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7567 elif self.op.remote_node is not None:
7568 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7569 self.op.remote_node = remote_node
7571 # Warning: do not remove the locking of the new secondary here
7572 # unless DRBD8.AddChildren is changed to work in parallel;
7573 # currently it doesn't since parallel invocations of
7574 # FindUnusedMinor will conflict
7575 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7576 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7579 self.needed_locks[locking.LEVEL_NODE] = []
7580 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7582 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7583 self.op.iallocator, self.op.remote_node,
7584 self.op.disks, False, self.op.early_release)
7586 self.tasklets = [self.replacer]
7588 def DeclareLocks(self, level):
7589 # If we're not already locking all nodes in the set we have to declare the
7590 # instance's primary/secondary nodes.
7591 if (level == locking.LEVEL_NODE and
7592 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7593 self._LockInstancesNodes()
7595 def BuildHooksEnv(self):
7598 This runs on the master, the primary and all the secondaries.
7601 instance = self.replacer.instance
7603 "MODE": self.op.mode,
7604 "NEW_SECONDARY": self.op.remote_node,
7605 "OLD_SECONDARY": instance.secondary_nodes[0],
7607 env.update(_BuildInstanceHookEnvByObject(self, instance))
7609 self.cfg.GetMasterNode(),
7610 instance.primary_node,
7612 if self.op.remote_node is not None:
7613 nl.append(self.op.remote_node)
7617 class TLReplaceDisks(Tasklet):
7618 """Replaces disks for an instance.
7620 Note: Locking is not within the scope of this class.
7623 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7624 disks, delay_iallocator, early_release):
7625 """Initializes this class.
7628 Tasklet.__init__(self, lu)
7631 self.instance_name = instance_name
7633 self.iallocator_name = iallocator_name
7634 self.remote_node = remote_node
7636 self.delay_iallocator = delay_iallocator
7637 self.early_release = early_release
7640 self.instance = None
7641 self.new_node = None
7642 self.target_node = None
7643 self.other_node = None
7644 self.remote_node_info = None
7645 self.node_secondary_ip = None
7648 def CheckArguments(mode, remote_node, iallocator):
7649 """Helper function for users of this class.
7652 # check for valid parameter combination
7653 if mode == constants.REPLACE_DISK_CHG:
7654 if remote_node is None and iallocator is None:
7655 raise errors.OpPrereqError("When changing the secondary either an"
7656 " iallocator script must be used or the"
7657 " new node given", errors.ECODE_INVAL)
7659 if remote_node is not None and iallocator is not None:
7660 raise errors.OpPrereqError("Give either the iallocator or the new"
7661 " secondary, not both", errors.ECODE_INVAL)
7663 elif remote_node is not None or iallocator is not None:
7664 # Not replacing the secondary
7665 raise errors.OpPrereqError("The iallocator and new node options can"
7666 " only be used when changing the"
7667 " secondary node", errors.ECODE_INVAL)
7670 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7671 """Compute a new secondary node using an IAllocator.
7674 ial = IAllocator(lu.cfg, lu.rpc,
7675 mode=constants.IALLOCATOR_MODE_RELOC,
7677 relocate_from=relocate_from)
7679 ial.Run(iallocator_name)
7682 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7683 " %s" % (iallocator_name, ial.info),
7686 if len(ial.result) != ial.required_nodes:
7687 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7688 " of nodes (%s), required %s" %
7690 len(ial.result), ial.required_nodes),
7693 remote_node_name = ial.result[0]
7695 lu.LogInfo("Selected new secondary for instance '%s': %s",
7696 instance_name, remote_node_name)
7698 return remote_node_name
7700 def _FindFaultyDisks(self, node_name):
7701 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7704 def CheckPrereq(self):
7705 """Check prerequisites.
7707 This checks that the instance is in the cluster.
7710 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7711 assert instance is not None, \
7712 "Cannot retrieve locked instance %s" % self.instance_name
7714 if instance.disk_template != constants.DT_DRBD8:
7715 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7716 " instances", errors.ECODE_INVAL)
7718 if len(instance.secondary_nodes) != 1:
7719 raise errors.OpPrereqError("The instance has a strange layout,"
7720 " expected one secondary but found %d" %
7721 len(instance.secondary_nodes),
7724 if not self.delay_iallocator:
7725 self._CheckPrereq2()
7727 def _CheckPrereq2(self):
7728 """Check prerequisites, second part.
7730 This function should always be part of CheckPrereq. It was separated and is
7731 now called from Exec because during node evacuation iallocator was only
7732 called with an unmodified cluster model, not taking planned changes into
7736 instance = self.instance
7737 secondary_node = instance.secondary_nodes[0]
7739 if self.iallocator_name is None:
7740 remote_node = self.remote_node
7742 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7743 instance.name, instance.secondary_nodes)
7745 if remote_node is not None:
7746 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7747 assert self.remote_node_info is not None, \
7748 "Cannot retrieve locked node %s" % remote_node
7750 self.remote_node_info = None
7752 if remote_node == self.instance.primary_node:
7753 raise errors.OpPrereqError("The specified node is the primary node of"
7754 " the instance.", errors.ECODE_INVAL)
7756 if remote_node == secondary_node:
7757 raise errors.OpPrereqError("The specified node is already the"
7758 " secondary node of the instance.",
7761 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7762 constants.REPLACE_DISK_CHG):
7763 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7766 if self.mode == constants.REPLACE_DISK_AUTO:
7767 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7768 faulty_secondary = self._FindFaultyDisks(secondary_node)
7770 if faulty_primary and faulty_secondary:
7771 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7772 " one node and can not be repaired"
7773 " automatically" % self.instance_name,
7777 self.disks = faulty_primary
7778 self.target_node = instance.primary_node
7779 self.other_node = secondary_node
7780 check_nodes = [self.target_node, self.other_node]
7781 elif faulty_secondary:
7782 self.disks = faulty_secondary
7783 self.target_node = secondary_node
7784 self.other_node = instance.primary_node
7785 check_nodes = [self.target_node, self.other_node]
7791 # Non-automatic modes
7792 if self.mode == constants.REPLACE_DISK_PRI:
7793 self.target_node = instance.primary_node
7794 self.other_node = secondary_node
7795 check_nodes = [self.target_node, self.other_node]
7797 elif self.mode == constants.REPLACE_DISK_SEC:
7798 self.target_node = secondary_node
7799 self.other_node = instance.primary_node
7800 check_nodes = [self.target_node, self.other_node]
7802 elif self.mode == constants.REPLACE_DISK_CHG:
7803 self.new_node = remote_node
7804 self.other_node = instance.primary_node
7805 self.target_node = secondary_node
7806 check_nodes = [self.new_node, self.other_node]
7808 _CheckNodeNotDrained(self.lu, remote_node)
7810 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7811 assert old_node_info is not None
7812 if old_node_info.offline and not self.early_release:
7813 # doesn't make sense to delay the release
7814 self.early_release = True
7815 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7816 " early-release mode", secondary_node)
7819 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7822 # If not specified all disks should be replaced
7824 self.disks = range(len(self.instance.disks))
7826 for node in check_nodes:
7827 _CheckNodeOnline(self.lu, node)
7829 # Check whether disks are valid
7830 for disk_idx in self.disks:
7831 instance.FindDisk(disk_idx)
7833 # Get secondary node IP addresses
7836 for node_name in [self.target_node, self.other_node, self.new_node]:
7837 if node_name is not None:
7838 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7840 self.node_secondary_ip = node_2nd_ip
7842 def Exec(self, feedback_fn):
7843 """Execute disk replacement.
7845 This dispatches the disk replacement to the appropriate handler.
7848 if self.delay_iallocator:
7849 self._CheckPrereq2()
7852 feedback_fn("No disks need replacement")
7855 feedback_fn("Replacing disk(s) %s for %s" %
7856 (utils.CommaJoin(self.disks), self.instance.name))
7858 activate_disks = (not self.instance.admin_up)
7860 # Activate the instance disks if we're replacing them on a down instance
7862 _StartInstanceDisks(self.lu, self.instance, True)
7865 # Should we replace the secondary node?
7866 if self.new_node is not None:
7867 fn = self._ExecDrbd8Secondary
7869 fn = self._ExecDrbd8DiskOnly
7871 return fn(feedback_fn)
7874 # Deactivate the instance disks if we're replacing them on a
7877 _SafeShutdownInstanceDisks(self.lu, self.instance)
7879 def _CheckVolumeGroup(self, nodes):
7880 self.lu.LogInfo("Checking volume groups")
7882 vgname = self.cfg.GetVGName()
7884 # Make sure volume group exists on all involved nodes
7885 results = self.rpc.call_vg_list(nodes)
7887 raise errors.OpExecError("Can't list volume groups on the nodes")
7891 res.Raise("Error checking node %s" % node)
7892 if vgname not in res.payload:
7893 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7896 def _CheckDisksExistence(self, nodes):
7897 # Check disk existence
7898 for idx, dev in enumerate(self.instance.disks):
7899 if idx not in self.disks:
7903 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7904 self.cfg.SetDiskID(dev, node)
7906 result = self.rpc.call_blockdev_find(node, dev)
7908 msg = result.fail_msg
7909 if msg or not result.payload:
7911 msg = "disk not found"
7912 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7915 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7916 for idx, dev in enumerate(self.instance.disks):
7917 if idx not in self.disks:
7920 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7923 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7925 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7926 " replace disks for instance %s" %
7927 (node_name, self.instance.name))
7929 def _CreateNewStorage(self, node_name):
7930 vgname = self.cfg.GetVGName()
7933 for idx, dev in enumerate(self.instance.disks):
7934 if idx not in self.disks:
7937 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7939 self.cfg.SetDiskID(dev, node_name)
7941 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7942 names = _GenerateUniqueNames(self.lu, lv_names)
7944 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7945 logical_id=(vgname, names[0]))
7946 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7947 logical_id=(vgname, names[1]))
7949 new_lvs = [lv_data, lv_meta]
7950 old_lvs = dev.children
7951 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7953 # we pass force_create=True to force the LVM creation
7954 for new_lv in new_lvs:
7955 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7956 _GetInstanceInfoText(self.instance), False)
7960 def _CheckDevices(self, node_name, iv_names):
7961 for name, (dev, _, _) in iv_names.iteritems():
7962 self.cfg.SetDiskID(dev, node_name)
7964 result = self.rpc.call_blockdev_find(node_name, dev)
7966 msg = result.fail_msg
7967 if msg or not result.payload:
7969 msg = "disk not found"
7970 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7973 if result.payload.is_degraded:
7974 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7976 def _RemoveOldStorage(self, node_name, iv_names):
7977 for name, (_, old_lvs, _) in iv_names.iteritems():
7978 self.lu.LogInfo("Remove logical volumes for %s" % name)
7981 self.cfg.SetDiskID(lv, node_name)
7983 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7985 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7986 hint="remove unused LVs manually")
7988 def _ReleaseNodeLock(self, node_name):
7989 """Releases the lock for a given node."""
7990 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7992 def _ExecDrbd8DiskOnly(self, feedback_fn):
7993 """Replace a disk on the primary or secondary for DRBD 8.
7995 The algorithm for replace is quite complicated:
7997 1. for each disk to be replaced:
7999 1. create new LVs on the target node with unique names
8000 1. detach old LVs from the drbd device
8001 1. rename old LVs to name_replaced.<time_t>
8002 1. rename new LVs to old LVs
8003 1. attach the new LVs (with the old names now) to the drbd device
8005 1. wait for sync across all devices
8007 1. for each modified disk:
8009 1. remove old LVs (which have the name name_replaces.<time_t>)
8011 Failures are not very well handled.
8016 # Step: check device activation
8017 self.lu.LogStep(1, steps_total, "Check device existence")
8018 self._CheckDisksExistence([self.other_node, self.target_node])
8019 self._CheckVolumeGroup([self.target_node, self.other_node])
8021 # Step: check other node consistency
8022 self.lu.LogStep(2, steps_total, "Check peer consistency")
8023 self._CheckDisksConsistency(self.other_node,
8024 self.other_node == self.instance.primary_node,
8027 # Step: create new storage
8028 self.lu.LogStep(3, steps_total, "Allocate new storage")
8029 iv_names = self._CreateNewStorage(self.target_node)
8031 # Step: for each lv, detach+rename*2+attach
8032 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8033 for dev, old_lvs, new_lvs in iv_names.itervalues():
8034 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8036 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8038 result.Raise("Can't detach drbd from local storage on node"
8039 " %s for device %s" % (self.target_node, dev.iv_name))
8041 #cfg.Update(instance)
8043 # ok, we created the new LVs, so now we know we have the needed
8044 # storage; as such, we proceed on the target node to rename
8045 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8046 # using the assumption that logical_id == physical_id (which in
8047 # turn is the unique_id on that node)
8049 # FIXME(iustin): use a better name for the replaced LVs
8050 temp_suffix = int(time.time())
8051 ren_fn = lambda d, suff: (d.physical_id[0],
8052 d.physical_id[1] + "_replaced-%s" % suff)
8054 # Build the rename list based on what LVs exist on the node
8055 rename_old_to_new = []
8056 for to_ren in old_lvs:
8057 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8058 if not result.fail_msg and result.payload:
8060 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8062 self.lu.LogInfo("Renaming the old LVs on the target node")
8063 result = self.rpc.call_blockdev_rename(self.target_node,
8065 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8067 # Now we rename the new LVs to the old LVs
8068 self.lu.LogInfo("Renaming the new LVs on the target node")
8069 rename_new_to_old = [(new, old.physical_id)
8070 for old, new in zip(old_lvs, new_lvs)]
8071 result = self.rpc.call_blockdev_rename(self.target_node,
8073 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8075 for old, new in zip(old_lvs, new_lvs):
8076 new.logical_id = old.logical_id
8077 self.cfg.SetDiskID(new, self.target_node)
8079 for disk in old_lvs:
8080 disk.logical_id = ren_fn(disk, temp_suffix)
8081 self.cfg.SetDiskID(disk, self.target_node)
8083 # Now that the new lvs have the old name, we can add them to the device
8084 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8085 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8087 msg = result.fail_msg
8089 for new_lv in new_lvs:
8090 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8093 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8094 hint=("cleanup manually the unused logical"
8096 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8098 dev.children = new_lvs
8100 self.cfg.Update(self.instance, feedback_fn)
8103 if self.early_release:
8104 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8106 self._RemoveOldStorage(self.target_node, iv_names)
8107 # WARNING: we release both node locks here, do not do other RPCs
8108 # than WaitForSync to the primary node
8109 self._ReleaseNodeLock([self.target_node, self.other_node])
8112 # This can fail as the old devices are degraded and _WaitForSync
8113 # does a combined result over all disks, so we don't check its return value
8114 self.lu.LogStep(cstep, steps_total, "Sync devices")
8116 _WaitForSync(self.lu, self.instance)
8118 # Check all devices manually
8119 self._CheckDevices(self.instance.primary_node, iv_names)
8121 # Step: remove old storage
8122 if not self.early_release:
8123 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8125 self._RemoveOldStorage(self.target_node, iv_names)
8127 def _ExecDrbd8Secondary(self, feedback_fn):
8128 """Replace the secondary node for DRBD 8.
8130 The algorithm for replace is quite complicated:
8131 - for all disks of the instance:
8132 - create new LVs on the new node with same names
8133 - shutdown the drbd device on the old secondary
8134 - disconnect the drbd network on the primary
8135 - create the drbd device on the new secondary
8136 - network attach the drbd on the primary, using an artifice:
8137 the drbd code for Attach() will connect to the network if it
8138 finds a device which is connected to the good local disks but
8140 - wait for sync across all devices
8141 - remove all disks from the old secondary
8143 Failures are not very well handled.
8148 # Step: check device activation
8149 self.lu.LogStep(1, steps_total, "Check device existence")
8150 self._CheckDisksExistence([self.instance.primary_node])
8151 self._CheckVolumeGroup([self.instance.primary_node])
8153 # Step: check other node consistency
8154 self.lu.LogStep(2, steps_total, "Check peer consistency")
8155 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8157 # Step: create new storage
8158 self.lu.LogStep(3, steps_total, "Allocate new storage")
8159 for idx, dev in enumerate(self.instance.disks):
8160 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8161 (self.new_node, idx))
8162 # we pass force_create=True to force LVM creation
8163 for new_lv in dev.children:
8164 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8165 _GetInstanceInfoText(self.instance), False)
8167 # Step 4: dbrd minors and drbd setups changes
8168 # after this, we must manually remove the drbd minors on both the
8169 # error and the success paths
8170 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8171 minors = self.cfg.AllocateDRBDMinor([self.new_node
8172 for dev in self.instance.disks],
8174 logging.debug("Allocated minors %r", minors)
8177 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8178 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8179 (self.new_node, idx))
8180 # create new devices on new_node; note that we create two IDs:
8181 # one without port, so the drbd will be activated without
8182 # networking information on the new node at this stage, and one
8183 # with network, for the latter activation in step 4
8184 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8185 if self.instance.primary_node == o_node1:
8188 assert self.instance.primary_node == o_node2, "Three-node instance?"
8191 new_alone_id = (self.instance.primary_node, self.new_node, None,
8192 p_minor, new_minor, o_secret)
8193 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8194 p_minor, new_minor, o_secret)
8196 iv_names[idx] = (dev, dev.children, new_net_id)
8197 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8199 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8200 logical_id=new_alone_id,
8201 children=dev.children,
8204 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8205 _GetInstanceInfoText(self.instance), False)
8206 except errors.GenericError:
8207 self.cfg.ReleaseDRBDMinors(self.instance.name)
8210 # We have new devices, shutdown the drbd on the old secondary
8211 for idx, dev in enumerate(self.instance.disks):
8212 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8213 self.cfg.SetDiskID(dev, self.target_node)
8214 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8216 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8217 "node: %s" % (idx, msg),
8218 hint=("Please cleanup this device manually as"
8219 " soon as possible"))
8221 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8222 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8223 self.node_secondary_ip,
8224 self.instance.disks)\
8225 [self.instance.primary_node]
8227 msg = result.fail_msg
8229 # detaches didn't succeed (unlikely)
8230 self.cfg.ReleaseDRBDMinors(self.instance.name)
8231 raise errors.OpExecError("Can't detach the disks from the network on"
8232 " old node: %s" % (msg,))
8234 # if we managed to detach at least one, we update all the disks of
8235 # the instance to point to the new secondary
8236 self.lu.LogInfo("Updating instance configuration")
8237 for dev, _, new_logical_id in iv_names.itervalues():
8238 dev.logical_id = new_logical_id
8239 self.cfg.SetDiskID(dev, self.instance.primary_node)
8241 self.cfg.Update(self.instance, feedback_fn)
8243 # and now perform the drbd attach
8244 self.lu.LogInfo("Attaching primary drbds to new secondary"
8245 " (standalone => connected)")
8246 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8248 self.node_secondary_ip,
8249 self.instance.disks,
8252 for to_node, to_result in result.items():
8253 msg = to_result.fail_msg
8255 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8257 hint=("please do a gnt-instance info to see the"
8258 " status of disks"))
8260 if self.early_release:
8261 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8263 self._RemoveOldStorage(self.target_node, iv_names)
8264 # WARNING: we release all node locks here, do not do other RPCs
8265 # than WaitForSync to the primary node
8266 self._ReleaseNodeLock([self.instance.primary_node,
8271 # This can fail as the old devices are degraded and _WaitForSync
8272 # does a combined result over all disks, so we don't check its return value
8273 self.lu.LogStep(cstep, steps_total, "Sync devices")
8275 _WaitForSync(self.lu, self.instance)
8277 # Check all devices manually
8278 self._CheckDevices(self.instance.primary_node, iv_names)
8280 # Step: remove old storage
8281 if not self.early_release:
8282 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8283 self._RemoveOldStorage(self.target_node, iv_names)
8286 class LURepairNodeStorage(NoHooksLU):
8287 """Repairs the volume group on a node.
8292 ("storage_type", _NoDefault, _CheckStorageType),
8293 ("name", _NoDefault, _TNonEmptyString),
8294 ("ignore_consistency", False, _TBool),
8298 def CheckArguments(self):
8299 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8301 storage_type = self.op.storage_type
8303 if (constants.SO_FIX_CONSISTENCY not in
8304 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8305 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8306 " repaired" % storage_type,
8309 def ExpandNames(self):
8310 self.needed_locks = {
8311 locking.LEVEL_NODE: [self.op.node_name],
8314 def _CheckFaultyDisks(self, instance, node_name):
8315 """Ensure faulty disks abort the opcode or at least warn."""
8317 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8319 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8320 " node '%s'" % (instance.name, node_name),
8322 except errors.OpPrereqError, err:
8323 if self.op.ignore_consistency:
8324 self.proc.LogWarning(str(err.args[0]))
8328 def CheckPrereq(self):
8329 """Check prerequisites.
8332 # Check whether any instance on this node has faulty disks
8333 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8334 if not inst.admin_up:
8336 check_nodes = set(inst.all_nodes)
8337 check_nodes.discard(self.op.node_name)
8338 for inst_node_name in check_nodes:
8339 self._CheckFaultyDisks(inst, inst_node_name)
8341 def Exec(self, feedback_fn):
8342 feedback_fn("Repairing storage unit '%s' on %s ..." %
8343 (self.op.name, self.op.node_name))
8345 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8346 result = self.rpc.call_storage_execute(self.op.node_name,
8347 self.op.storage_type, st_args,
8349 constants.SO_FIX_CONSISTENCY)
8350 result.Raise("Failed to repair storage unit '%s' on %s" %
8351 (self.op.name, self.op.node_name))
8354 class LUNodeEvacuationStrategy(NoHooksLU):
8355 """Computes the node evacuation strategy.
8359 ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8360 ("remote_node", None, _TMaybeString),
8361 ("iallocator", None, _TMaybeString),
8365 def CheckArguments(self):
8366 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8368 def ExpandNames(self):
8369 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8370 self.needed_locks = locks = {}
8371 if self.op.remote_node is None:
8372 locks[locking.LEVEL_NODE] = locking.ALL_SET
8374 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8375 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8377 def Exec(self, feedback_fn):
8378 if self.op.remote_node is not None:
8380 for node in self.op.nodes:
8381 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8384 if i.primary_node == self.op.remote_node:
8385 raise errors.OpPrereqError("Node %s is the primary node of"
8386 " instance %s, cannot use it as"
8388 (self.op.remote_node, i.name),
8390 result.append([i.name, self.op.remote_node])
8392 ial = IAllocator(self.cfg, self.rpc,
8393 mode=constants.IALLOCATOR_MODE_MEVAC,
8394 evac_nodes=self.op.nodes)
8395 ial.Run(self.op.iallocator, validate=True)
8397 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8403 class LUGrowDisk(LogicalUnit):
8404 """Grow a disk of an instance.
8408 HTYPE = constants.HTYPE_INSTANCE
8411 ("disk", _NoDefault, _TInt),
8412 ("amount", _NoDefault, _TInt),
8413 ("wait_for_sync", True, _TBool),
8417 def ExpandNames(self):
8418 self._ExpandAndLockInstance()
8419 self.needed_locks[locking.LEVEL_NODE] = []
8420 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8422 def DeclareLocks(self, level):
8423 if level == locking.LEVEL_NODE:
8424 self._LockInstancesNodes()
8426 def BuildHooksEnv(self):
8429 This runs on the master, the primary and all the secondaries.
8433 "DISK": self.op.disk,
8434 "AMOUNT": self.op.amount,
8436 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8437 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8440 def CheckPrereq(self):
8441 """Check prerequisites.
8443 This checks that the instance is in the cluster.
8446 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8447 assert instance is not None, \
8448 "Cannot retrieve locked instance %s" % self.op.instance_name
8449 nodenames = list(instance.all_nodes)
8450 for node in nodenames:
8451 _CheckNodeOnline(self, node)
8453 self.instance = instance
8455 if instance.disk_template not in constants.DTS_GROWABLE:
8456 raise errors.OpPrereqError("Instance's disk layout does not support"
8457 " growing.", errors.ECODE_INVAL)
8459 self.disk = instance.FindDisk(self.op.disk)
8461 if instance.disk_template != constants.DT_FILE:
8462 # TODO: check the free disk space for file, when that feature will be
8464 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8466 def Exec(self, feedback_fn):
8467 """Execute disk grow.
8470 instance = self.instance
8473 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8475 raise errors.OpExecError("Cannot activate block device to grow")
8477 for node in instance.all_nodes:
8478 self.cfg.SetDiskID(disk, node)
8479 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8480 result.Raise("Grow request failed to node %s" % node)
8482 # TODO: Rewrite code to work properly
8483 # DRBD goes into sync mode for a short amount of time after executing the
8484 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8485 # calling "resize" in sync mode fails. Sleeping for a short amount of
8486 # time is a work-around.
8489 disk.RecordGrow(self.op.amount)
8490 self.cfg.Update(instance, feedback_fn)
8491 if self.op.wait_for_sync:
8492 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8494 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8495 " status.\nPlease check the instance.")
8496 if not instance.admin_up:
8497 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8498 elif not instance.admin_up:
8499 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8500 " not supposed to be running because no wait for"
8501 " sync mode was requested.")
8504 class LUQueryInstanceData(NoHooksLU):
8505 """Query runtime instance data.
8509 ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8510 ("static", False, _TBool),
8514 def ExpandNames(self):
8515 self.needed_locks = {}
8516 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8518 if self.op.instances:
8519 self.wanted_names = []
8520 for name in self.op.instances:
8521 full_name = _ExpandInstanceName(self.cfg, name)
8522 self.wanted_names.append(full_name)
8523 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8525 self.wanted_names = None
8526 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8528 self.needed_locks[locking.LEVEL_NODE] = []
8529 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8531 def DeclareLocks(self, level):
8532 if level == locking.LEVEL_NODE:
8533 self._LockInstancesNodes()
8535 def CheckPrereq(self):
8536 """Check prerequisites.
8538 This only checks the optional instance list against the existing names.
8541 if self.wanted_names is None:
8542 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8544 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8545 in self.wanted_names]
8547 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8548 """Returns the status of a block device
8551 if self.op.static or not node:
8554 self.cfg.SetDiskID(dev, node)
8556 result = self.rpc.call_blockdev_find(node, dev)
8560 result.Raise("Can't compute disk status for %s" % instance_name)
8562 status = result.payload
8566 return (status.dev_path, status.major, status.minor,
8567 status.sync_percent, status.estimated_time,
8568 status.is_degraded, status.ldisk_status)
8570 def _ComputeDiskStatus(self, instance, snode, dev):
8571 """Compute block device status.
8574 if dev.dev_type in constants.LDS_DRBD:
8575 # we change the snode then (otherwise we use the one passed in)
8576 if dev.logical_id[0] == instance.primary_node:
8577 snode = dev.logical_id[1]
8579 snode = dev.logical_id[0]
8581 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8583 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8586 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8587 for child in dev.children]
8592 "iv_name": dev.iv_name,
8593 "dev_type": dev.dev_type,
8594 "logical_id": dev.logical_id,
8595 "physical_id": dev.physical_id,
8596 "pstatus": dev_pstatus,
8597 "sstatus": dev_sstatus,
8598 "children": dev_children,
8605 def Exec(self, feedback_fn):
8606 """Gather and return data"""
8609 cluster = self.cfg.GetClusterInfo()
8611 for instance in self.wanted_instances:
8612 if not self.op.static:
8613 remote_info = self.rpc.call_instance_info(instance.primary_node,
8615 instance.hypervisor)
8616 remote_info.Raise("Error checking node %s" % instance.primary_node)
8617 remote_info = remote_info.payload
8618 if remote_info and "state" in remote_info:
8621 remote_state = "down"
8624 if instance.admin_up:
8627 config_state = "down"
8629 disks = [self._ComputeDiskStatus(instance, None, device)
8630 for device in instance.disks]
8633 "name": instance.name,
8634 "config_state": config_state,
8635 "run_state": remote_state,
8636 "pnode": instance.primary_node,
8637 "snodes": instance.secondary_nodes,
8639 # this happens to be the same format used for hooks
8640 "nics": _NICListToTuple(self, instance.nics),
8641 "disk_template": instance.disk_template,
8643 "hypervisor": instance.hypervisor,
8644 "network_port": instance.network_port,
8645 "hv_instance": instance.hvparams,
8646 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8647 "be_instance": instance.beparams,
8648 "be_actual": cluster.FillBE(instance),
8649 "os_instance": instance.osparams,
8650 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8651 "serial_no": instance.serial_no,
8652 "mtime": instance.mtime,
8653 "ctime": instance.ctime,
8654 "uuid": instance.uuid,
8657 result[instance.name] = idict
8662 class LUSetInstanceParams(LogicalUnit):
8663 """Modifies an instances's parameters.
8666 HPATH = "instance-modify"
8667 HTYPE = constants.HTYPE_INSTANCE
8670 ("nics", _EmptyList, _TList),
8671 ("disks", _EmptyList, _TList),
8672 ("beparams", _EmptyDict, _TDict),
8673 ("hvparams", _EmptyDict, _TDict),
8674 ("disk_template", None, _TMaybeString),
8675 ("remote_node", None, _TMaybeString),
8676 ("os_name", None, _TMaybeString),
8677 ("force_variant", False, _TBool),
8678 ("osparams", None, _TOr(_TDict, _TNone)),
8683 def CheckArguments(self):
8684 if not (self.op.nics or self.op.disks or self.op.disk_template or
8685 self.op.hvparams or self.op.beparams or self.op.os_name):
8686 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8688 if self.op.hvparams:
8689 _CheckGlobalHvParams(self.op.hvparams)
8693 for disk_op, disk_dict in self.op.disks:
8694 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8695 if disk_op == constants.DDM_REMOVE:
8698 elif disk_op == constants.DDM_ADD:
8701 if not isinstance(disk_op, int):
8702 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8703 if not isinstance(disk_dict, dict):
8704 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8705 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8707 if disk_op == constants.DDM_ADD:
8708 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8709 if mode not in constants.DISK_ACCESS_SET:
8710 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8712 size = disk_dict.get('size', None)
8714 raise errors.OpPrereqError("Required disk parameter size missing",
8718 except (TypeError, ValueError), err:
8719 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8720 str(err), errors.ECODE_INVAL)
8721 disk_dict['size'] = size
8723 # modification of disk
8724 if 'size' in disk_dict:
8725 raise errors.OpPrereqError("Disk size change not possible, use"
8726 " grow-disk", errors.ECODE_INVAL)
8728 if disk_addremove > 1:
8729 raise errors.OpPrereqError("Only one disk add or remove operation"
8730 " supported at a time", errors.ECODE_INVAL)
8732 if self.op.disks and self.op.disk_template is not None:
8733 raise errors.OpPrereqError("Disk template conversion and other disk"
8734 " changes not supported at the same time",
8737 if self.op.disk_template:
8738 _CheckDiskTemplate(self.op.disk_template)
8739 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8740 self.op.remote_node is None):
8741 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8742 " one requires specifying a secondary node",
8747 for nic_op, nic_dict in self.op.nics:
8748 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8749 if nic_op == constants.DDM_REMOVE:
8752 elif nic_op == constants.DDM_ADD:
8755 if not isinstance(nic_op, int):
8756 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8757 if not isinstance(nic_dict, dict):
8758 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8759 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8761 # nic_dict should be a dict
8762 nic_ip = nic_dict.get('ip', None)
8763 if nic_ip is not None:
8764 if nic_ip.lower() == constants.VALUE_NONE:
8765 nic_dict['ip'] = None
8767 if not netutils.IsValidIP4(nic_ip):
8768 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8771 nic_bridge = nic_dict.get('bridge', None)
8772 nic_link = nic_dict.get('link', None)
8773 if nic_bridge and nic_link:
8774 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8775 " at the same time", errors.ECODE_INVAL)
8776 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8777 nic_dict['bridge'] = None
8778 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8779 nic_dict['link'] = None
8781 if nic_op == constants.DDM_ADD:
8782 nic_mac = nic_dict.get('mac', None)
8784 nic_dict['mac'] = constants.VALUE_AUTO
8786 if 'mac' in nic_dict:
8787 nic_mac = nic_dict['mac']
8788 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8789 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8791 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8792 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8793 " modifying an existing nic",
8796 if nic_addremove > 1:
8797 raise errors.OpPrereqError("Only one NIC add or remove operation"
8798 " supported at a time", errors.ECODE_INVAL)
8800 def ExpandNames(self):
8801 self._ExpandAndLockInstance()
8802 self.needed_locks[locking.LEVEL_NODE] = []
8803 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8805 def DeclareLocks(self, level):
8806 if level == locking.LEVEL_NODE:
8807 self._LockInstancesNodes()
8808 if self.op.disk_template and self.op.remote_node:
8809 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8810 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8812 def BuildHooksEnv(self):
8815 This runs on the master, primary and secondaries.
8819 if constants.BE_MEMORY in self.be_new:
8820 args['memory'] = self.be_new[constants.BE_MEMORY]
8821 if constants.BE_VCPUS in self.be_new:
8822 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8823 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8824 # information at all.
8827 nic_override = dict(self.op.nics)
8828 for idx, nic in enumerate(self.instance.nics):
8829 if idx in nic_override:
8830 this_nic_override = nic_override[idx]
8832 this_nic_override = {}
8833 if 'ip' in this_nic_override:
8834 ip = this_nic_override['ip']
8837 if 'mac' in this_nic_override:
8838 mac = this_nic_override['mac']
8841 if idx in self.nic_pnew:
8842 nicparams = self.nic_pnew[idx]
8844 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8845 mode = nicparams[constants.NIC_MODE]
8846 link = nicparams[constants.NIC_LINK]
8847 args['nics'].append((ip, mac, mode, link))
8848 if constants.DDM_ADD in nic_override:
8849 ip = nic_override[constants.DDM_ADD].get('ip', None)
8850 mac = nic_override[constants.DDM_ADD]['mac']
8851 nicparams = self.nic_pnew[constants.DDM_ADD]
8852 mode = nicparams[constants.NIC_MODE]
8853 link = nicparams[constants.NIC_LINK]
8854 args['nics'].append((ip, mac, mode, link))
8855 elif constants.DDM_REMOVE in nic_override:
8856 del args['nics'][-1]
8858 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8859 if self.op.disk_template:
8860 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8861 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8864 def CheckPrereq(self):
8865 """Check prerequisites.
8867 This only checks the instance list against the existing names.
8870 # checking the new params on the primary/secondary nodes
8872 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8873 cluster = self.cluster = self.cfg.GetClusterInfo()
8874 assert self.instance is not None, \
8875 "Cannot retrieve locked instance %s" % self.op.instance_name
8876 pnode = instance.primary_node
8877 nodelist = list(instance.all_nodes)
8880 if self.op.os_name and not self.op.force:
8881 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8882 self.op.force_variant)
8883 instance_os = self.op.os_name
8885 instance_os = instance.os
8887 if self.op.disk_template:
8888 if instance.disk_template == self.op.disk_template:
8889 raise errors.OpPrereqError("Instance already has disk template %s" %
8890 instance.disk_template, errors.ECODE_INVAL)
8892 if (instance.disk_template,
8893 self.op.disk_template) not in self._DISK_CONVERSIONS:
8894 raise errors.OpPrereqError("Unsupported disk template conversion from"
8895 " %s to %s" % (instance.disk_template,
8896 self.op.disk_template),
8898 _CheckInstanceDown(self, instance, "cannot change disk template")
8899 if self.op.disk_template in constants.DTS_NET_MIRROR:
8900 if self.op.remote_node == pnode:
8901 raise errors.OpPrereqError("Given new secondary node %s is the same"
8902 " as the primary node of the instance" %
8903 self.op.remote_node, errors.ECODE_STATE)
8904 _CheckNodeOnline(self, self.op.remote_node)
8905 _CheckNodeNotDrained(self, self.op.remote_node)
8906 disks = [{"size": d.size} for d in instance.disks]
8907 required = _ComputeDiskSize(self.op.disk_template, disks)
8908 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8910 # hvparams processing
8911 if self.op.hvparams:
8912 hv_type = instance.hypervisor
8913 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8914 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8915 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8918 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8919 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8920 self.hv_new = hv_new # the new actual values
8921 self.hv_inst = i_hvdict # the new dict (without defaults)
8923 self.hv_new = self.hv_inst = {}
8925 # beparams processing
8926 if self.op.beparams:
8927 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8929 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8930 be_new = cluster.SimpleFillBE(i_bedict)
8931 self.be_new = be_new # the new actual values
8932 self.be_inst = i_bedict # the new dict (without defaults)
8934 self.be_new = self.be_inst = {}
8936 # osparams processing
8937 if self.op.osparams:
8938 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8939 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8940 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8941 self.os_inst = i_osdict # the new dict (without defaults)
8943 self.os_new = self.os_inst = {}
8947 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8948 mem_check_list = [pnode]
8949 if be_new[constants.BE_AUTO_BALANCE]:
8950 # either we changed auto_balance to yes or it was from before
8951 mem_check_list.extend(instance.secondary_nodes)
8952 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8953 instance.hypervisor)
8954 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8955 instance.hypervisor)
8956 pninfo = nodeinfo[pnode]
8957 msg = pninfo.fail_msg
8959 # Assume the primary node is unreachable and go ahead
8960 self.warn.append("Can't get info from primary node %s: %s" %
8962 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8963 self.warn.append("Node data from primary node %s doesn't contain"
8964 " free memory information" % pnode)
8965 elif instance_info.fail_msg:
8966 self.warn.append("Can't get instance runtime information: %s" %
8967 instance_info.fail_msg)
8969 if instance_info.payload:
8970 current_mem = int(instance_info.payload['memory'])
8972 # Assume instance not running
8973 # (there is a slight race condition here, but it's not very probable,
8974 # and we have no other way to check)
8976 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8977 pninfo.payload['memory_free'])
8979 raise errors.OpPrereqError("This change will prevent the instance"
8980 " from starting, due to %d MB of memory"
8981 " missing on its primary node" % miss_mem,
8984 if be_new[constants.BE_AUTO_BALANCE]:
8985 for node, nres in nodeinfo.items():
8986 if node not in instance.secondary_nodes:
8990 self.warn.append("Can't get info from secondary node %s: %s" %
8992 elif not isinstance(nres.payload.get('memory_free', None), int):
8993 self.warn.append("Secondary node %s didn't return free"
8994 " memory information" % node)
8995 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8996 self.warn.append("Not enough memory to failover instance to"
8997 " secondary node %s" % node)
9002 for nic_op, nic_dict in self.op.nics:
9003 if nic_op == constants.DDM_REMOVE:
9004 if not instance.nics:
9005 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9008 if nic_op != constants.DDM_ADD:
9010 if not instance.nics:
9011 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9012 " no NICs" % nic_op,
9014 if nic_op < 0 or nic_op >= len(instance.nics):
9015 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9017 (nic_op, len(instance.nics) - 1),
9019 old_nic_params = instance.nics[nic_op].nicparams
9020 old_nic_ip = instance.nics[nic_op].ip
9025 update_params_dict = dict([(key, nic_dict[key])
9026 for key in constants.NICS_PARAMETERS
9027 if key in nic_dict])
9029 if 'bridge' in nic_dict:
9030 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9032 new_nic_params = _GetUpdatedParams(old_nic_params,
9034 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9035 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9036 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9037 self.nic_pinst[nic_op] = new_nic_params
9038 self.nic_pnew[nic_op] = new_filled_nic_params
9039 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9041 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9042 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9043 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9045 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9047 self.warn.append(msg)
9049 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9050 if new_nic_mode == constants.NIC_MODE_ROUTED:
9051 if 'ip' in nic_dict:
9052 nic_ip = nic_dict['ip']
9056 raise errors.OpPrereqError('Cannot set the nic ip to None'
9057 ' on a routed nic', errors.ECODE_INVAL)
9058 if 'mac' in nic_dict:
9059 nic_mac = nic_dict['mac']
9061 raise errors.OpPrereqError('Cannot set the nic mac to None',
9063 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9064 # otherwise generate the mac
9065 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9067 # or validate/reserve the current one
9069 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9070 except errors.ReservationError:
9071 raise errors.OpPrereqError("MAC address %s already in use"
9072 " in cluster" % nic_mac,
9073 errors.ECODE_NOTUNIQUE)
9076 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9077 raise errors.OpPrereqError("Disk operations not supported for"
9078 " diskless instances",
9080 for disk_op, _ in self.op.disks:
9081 if disk_op == constants.DDM_REMOVE:
9082 if len(instance.disks) == 1:
9083 raise errors.OpPrereqError("Cannot remove the last disk of"
9084 " an instance", errors.ECODE_INVAL)
9085 _CheckInstanceDown(self, instance, "cannot remove disks")
9087 if (disk_op == constants.DDM_ADD and
9088 len(instance.nics) >= constants.MAX_DISKS):
9089 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9090 " add more" % constants.MAX_DISKS,
9092 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9094 if disk_op < 0 or disk_op >= len(instance.disks):
9095 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9097 (disk_op, len(instance.disks)),
9102 def _ConvertPlainToDrbd(self, feedback_fn):
9103 """Converts an instance from plain to drbd.
9106 feedback_fn("Converting template to drbd")
9107 instance = self.instance
9108 pnode = instance.primary_node
9109 snode = self.op.remote_node
9111 # create a fake disk info for _GenerateDiskTemplate
9112 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9113 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9114 instance.name, pnode, [snode],
9115 disk_info, None, None, 0)
9116 info = _GetInstanceInfoText(instance)
9117 feedback_fn("Creating aditional volumes...")
9118 # first, create the missing data and meta devices
9119 for disk in new_disks:
9120 # unfortunately this is... not too nice
9121 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9123 for child in disk.children:
9124 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9125 # at this stage, all new LVs have been created, we can rename the
9127 feedback_fn("Renaming original volumes...")
9128 rename_list = [(o, n.children[0].logical_id)
9129 for (o, n) in zip(instance.disks, new_disks)]
9130 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9131 result.Raise("Failed to rename original LVs")
9133 feedback_fn("Initializing DRBD devices...")
9134 # all child devices are in place, we can now create the DRBD devices
9135 for disk in new_disks:
9136 for node in [pnode, snode]:
9137 f_create = node == pnode
9138 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9140 # at this point, the instance has been modified
9141 instance.disk_template = constants.DT_DRBD8
9142 instance.disks = new_disks
9143 self.cfg.Update(instance, feedback_fn)
9145 # disks are created, waiting for sync
9146 disk_abort = not _WaitForSync(self, instance)
9148 raise errors.OpExecError("There are some degraded disks for"
9149 " this instance, please cleanup manually")
9151 def _ConvertDrbdToPlain(self, feedback_fn):
9152 """Converts an instance from drbd to plain.
9155 instance = self.instance
9156 assert len(instance.secondary_nodes) == 1
9157 pnode = instance.primary_node
9158 snode = instance.secondary_nodes[0]
9159 feedback_fn("Converting template to plain")
9161 old_disks = instance.disks
9162 new_disks = [d.children[0] for d in old_disks]
9164 # copy over size and mode
9165 for parent, child in zip(old_disks, new_disks):
9166 child.size = parent.size
9167 child.mode = parent.mode
9169 # update instance structure
9170 instance.disks = new_disks
9171 instance.disk_template = constants.DT_PLAIN
9172 self.cfg.Update(instance, feedback_fn)
9174 feedback_fn("Removing volumes on the secondary node...")
9175 for disk in old_disks:
9176 self.cfg.SetDiskID(disk, snode)
9177 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9179 self.LogWarning("Could not remove block device %s on node %s,"
9180 " continuing anyway: %s", disk.iv_name, snode, msg)
9182 feedback_fn("Removing unneeded volumes on the primary node...")
9183 for idx, disk in enumerate(old_disks):
9184 meta = disk.children[1]
9185 self.cfg.SetDiskID(meta, pnode)
9186 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9188 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9189 " continuing anyway: %s", idx, pnode, msg)
9192 def Exec(self, feedback_fn):
9193 """Modifies an instance.
9195 All parameters take effect only at the next restart of the instance.
9198 # Process here the warnings from CheckPrereq, as we don't have a
9199 # feedback_fn there.
9200 for warn in self.warn:
9201 feedback_fn("WARNING: %s" % warn)
9204 instance = self.instance
9206 for disk_op, disk_dict in self.op.disks:
9207 if disk_op == constants.DDM_REMOVE:
9208 # remove the last disk
9209 device = instance.disks.pop()
9210 device_idx = len(instance.disks)
9211 for node, disk in device.ComputeNodeTree(instance.primary_node):
9212 self.cfg.SetDiskID(disk, node)
9213 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9215 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9216 " continuing anyway", device_idx, node, msg)
9217 result.append(("disk/%d" % device_idx, "remove"))
9218 elif disk_op == constants.DDM_ADD:
9220 if instance.disk_template == constants.DT_FILE:
9221 file_driver, file_path = instance.disks[0].logical_id
9222 file_path = os.path.dirname(file_path)
9224 file_driver = file_path = None
9225 disk_idx_base = len(instance.disks)
9226 new_disk = _GenerateDiskTemplate(self,
9227 instance.disk_template,
9228 instance.name, instance.primary_node,
9229 instance.secondary_nodes,
9234 instance.disks.append(new_disk)
9235 info = _GetInstanceInfoText(instance)
9237 logging.info("Creating volume %s for instance %s",
9238 new_disk.iv_name, instance.name)
9239 # Note: this needs to be kept in sync with _CreateDisks
9241 for node in instance.all_nodes:
9242 f_create = node == instance.primary_node
9244 _CreateBlockDev(self, node, instance, new_disk,
9245 f_create, info, f_create)
9246 except errors.OpExecError, err:
9247 self.LogWarning("Failed to create volume %s (%s) on"
9249 new_disk.iv_name, new_disk, node, err)
9250 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9251 (new_disk.size, new_disk.mode)))
9253 # change a given disk
9254 instance.disks[disk_op].mode = disk_dict['mode']
9255 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9257 if self.op.disk_template:
9258 r_shut = _ShutdownInstanceDisks(self, instance)
9260 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9261 " proceed with disk template conversion")
9262 mode = (instance.disk_template, self.op.disk_template)
9264 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9266 self.cfg.ReleaseDRBDMinors(instance.name)
9268 result.append(("disk_template", self.op.disk_template))
9271 for nic_op, nic_dict in self.op.nics:
9272 if nic_op == constants.DDM_REMOVE:
9273 # remove the last nic
9274 del instance.nics[-1]
9275 result.append(("nic.%d" % len(instance.nics), "remove"))
9276 elif nic_op == constants.DDM_ADD:
9277 # mac and bridge should be set, by now
9278 mac = nic_dict['mac']
9279 ip = nic_dict.get('ip', None)
9280 nicparams = self.nic_pinst[constants.DDM_ADD]
9281 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9282 instance.nics.append(new_nic)
9283 result.append(("nic.%d" % (len(instance.nics) - 1),
9284 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9285 (new_nic.mac, new_nic.ip,
9286 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9287 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9290 for key in 'mac', 'ip':
9292 setattr(instance.nics[nic_op], key, nic_dict[key])
9293 if nic_op in self.nic_pinst:
9294 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9295 for key, val in nic_dict.iteritems():
9296 result.append(("nic.%s/%d" % (key, nic_op), val))
9299 if self.op.hvparams:
9300 instance.hvparams = self.hv_inst
9301 for key, val in self.op.hvparams.iteritems():
9302 result.append(("hv/%s" % key, val))
9305 if self.op.beparams:
9306 instance.beparams = self.be_inst
9307 for key, val in self.op.beparams.iteritems():
9308 result.append(("be/%s" % key, val))
9312 instance.os = self.op.os_name
9315 if self.op.osparams:
9316 instance.osparams = self.os_inst
9317 for key, val in self.op.osparams.iteritems():
9318 result.append(("os/%s" % key, val))
9320 self.cfg.Update(instance, feedback_fn)
9324 _DISK_CONVERSIONS = {
9325 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9326 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9330 class LUQueryExports(NoHooksLU):
9331 """Query the exports list
9335 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9336 ("use_locking", False, _TBool),
9340 def ExpandNames(self):
9341 self.needed_locks = {}
9342 self.share_locks[locking.LEVEL_NODE] = 1
9343 if not self.op.nodes:
9344 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9346 self.needed_locks[locking.LEVEL_NODE] = \
9347 _GetWantedNodes(self, self.op.nodes)
9349 def Exec(self, feedback_fn):
9350 """Compute the list of all the exported system images.
9353 @return: a dictionary with the structure node->(export-list)
9354 where export-list is a list of the instances exported on
9358 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9359 rpcresult = self.rpc.call_export_list(self.nodes)
9361 for node in rpcresult:
9362 if rpcresult[node].fail_msg:
9363 result[node] = False
9365 result[node] = rpcresult[node].payload
9370 class LUPrepareExport(NoHooksLU):
9371 """Prepares an instance for an export and returns useful information.
9376 ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9380 def ExpandNames(self):
9381 self._ExpandAndLockInstance()
9383 def CheckPrereq(self):
9384 """Check prerequisites.
9387 instance_name = self.op.instance_name
9389 self.instance = self.cfg.GetInstanceInfo(instance_name)
9390 assert self.instance is not None, \
9391 "Cannot retrieve locked instance %s" % self.op.instance_name
9392 _CheckNodeOnline(self, self.instance.primary_node)
9394 self._cds = _GetClusterDomainSecret()
9396 def Exec(self, feedback_fn):
9397 """Prepares an instance for an export.
9400 instance = self.instance
9402 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9403 salt = utils.GenerateSecret(8)
9405 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9406 result = self.rpc.call_x509_cert_create(instance.primary_node,
9407 constants.RIE_CERT_VALIDITY)
9408 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9410 (name, cert_pem) = result.payload
9412 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9416 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9417 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9419 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9425 class LUExportInstance(LogicalUnit):
9426 """Export an instance to an image in the cluster.
9429 HPATH = "instance-export"
9430 HTYPE = constants.HTYPE_INSTANCE
9433 ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9434 ("shutdown", True, _TBool),
9436 ("remove_instance", False, _TBool),
9437 ("ignore_remove_failures", False, _TBool),
9438 ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9439 ("x509_key_name", None, _TOr(_TList, _TNone)),
9440 ("destination_x509_ca", None, _TMaybeString),
9444 def CheckArguments(self):
9445 """Check the arguments.
9448 self.x509_key_name = self.op.x509_key_name
9449 self.dest_x509_ca_pem = self.op.destination_x509_ca
9451 if self.op.remove_instance and not self.op.shutdown:
9452 raise errors.OpPrereqError("Can not remove instance without shutting it"
9455 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9456 if not self.x509_key_name:
9457 raise errors.OpPrereqError("Missing X509 key name for encryption",
9460 if not self.dest_x509_ca_pem:
9461 raise errors.OpPrereqError("Missing destination X509 CA",
9464 def ExpandNames(self):
9465 self._ExpandAndLockInstance()
9467 # Lock all nodes for local exports
9468 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9469 # FIXME: lock only instance primary and destination node
9471 # Sad but true, for now we have do lock all nodes, as we don't know where
9472 # the previous export might be, and in this LU we search for it and
9473 # remove it from its current node. In the future we could fix this by:
9474 # - making a tasklet to search (share-lock all), then create the
9475 # new one, then one to remove, after
9476 # - removing the removal operation altogether
9477 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9479 def DeclareLocks(self, level):
9480 """Last minute lock declaration."""
9481 # All nodes are locked anyway, so nothing to do here.
9483 def BuildHooksEnv(self):
9486 This will run on the master, primary node and target node.
9490 "EXPORT_MODE": self.op.mode,
9491 "EXPORT_NODE": self.op.target_node,
9492 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9493 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9494 # TODO: Generic function for boolean env variables
9495 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9498 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9500 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9502 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9503 nl.append(self.op.target_node)
9507 def CheckPrereq(self):
9508 """Check prerequisites.
9510 This checks that the instance and node names are valid.
9513 instance_name = self.op.instance_name
9515 self.instance = self.cfg.GetInstanceInfo(instance_name)
9516 assert self.instance is not None, \
9517 "Cannot retrieve locked instance %s" % self.op.instance_name
9518 _CheckNodeOnline(self, self.instance.primary_node)
9520 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9521 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9522 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9523 assert self.dst_node is not None
9525 _CheckNodeOnline(self, self.dst_node.name)
9526 _CheckNodeNotDrained(self, self.dst_node.name)
9529 self.dest_disk_info = None
9530 self.dest_x509_ca = None
9532 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9533 self.dst_node = None
9535 if len(self.op.target_node) != len(self.instance.disks):
9536 raise errors.OpPrereqError(("Received destination information for %s"
9537 " disks, but instance %s has %s disks") %
9538 (len(self.op.target_node), instance_name,
9539 len(self.instance.disks)),
9542 cds = _GetClusterDomainSecret()
9544 # Check X509 key name
9546 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9547 except (TypeError, ValueError), err:
9548 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9550 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9551 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9554 # Load and verify CA
9556 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9557 except OpenSSL.crypto.Error, err:
9558 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9559 (err, ), errors.ECODE_INVAL)
9561 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9562 if errcode is not None:
9563 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9564 (msg, ), errors.ECODE_INVAL)
9566 self.dest_x509_ca = cert
9568 # Verify target information
9570 for idx, disk_data in enumerate(self.op.target_node):
9572 (host, port, magic) = \
9573 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9574 except errors.GenericError, err:
9575 raise errors.OpPrereqError("Target info for disk %s: %s" %
9576 (idx, err), errors.ECODE_INVAL)
9578 disk_info.append((host, port, magic))
9580 assert len(disk_info) == len(self.op.target_node)
9581 self.dest_disk_info = disk_info
9584 raise errors.ProgrammerError("Unhandled export mode %r" %
9587 # instance disk type verification
9588 # TODO: Implement export support for file-based disks
9589 for disk in self.instance.disks:
9590 if disk.dev_type == constants.LD_FILE:
9591 raise errors.OpPrereqError("Export not supported for instances with"
9592 " file-based disks", errors.ECODE_INVAL)
9594 def _CleanupExports(self, feedback_fn):
9595 """Removes exports of current instance from all other nodes.
9597 If an instance in a cluster with nodes A..D was exported to node C, its
9598 exports will be removed from the nodes A, B and D.
9601 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9603 nodelist = self.cfg.GetNodeList()
9604 nodelist.remove(self.dst_node.name)
9606 # on one-node clusters nodelist will be empty after the removal
9607 # if we proceed the backup would be removed because OpQueryExports
9608 # substitutes an empty list with the full cluster node list.
9609 iname = self.instance.name
9611 feedback_fn("Removing old exports for instance %s" % iname)
9612 exportlist = self.rpc.call_export_list(nodelist)
9613 for node in exportlist:
9614 if exportlist[node].fail_msg:
9616 if iname in exportlist[node].payload:
9617 msg = self.rpc.call_export_remove(node, iname).fail_msg
9619 self.LogWarning("Could not remove older export for instance %s"
9620 " on node %s: %s", iname, node, msg)
9622 def Exec(self, feedback_fn):
9623 """Export an instance to an image in the cluster.
9626 assert self.op.mode in constants.EXPORT_MODES
9628 instance = self.instance
9629 src_node = instance.primary_node
9631 if self.op.shutdown:
9632 # shutdown the instance, but not the disks
9633 feedback_fn("Shutting down instance %s" % instance.name)
9634 result = self.rpc.call_instance_shutdown(src_node, instance,
9635 self.op.shutdown_timeout)
9636 # TODO: Maybe ignore failures if ignore_remove_failures is set
9637 result.Raise("Could not shutdown instance %s on"
9638 " node %s" % (instance.name, src_node))
9640 # set the disks ID correctly since call_instance_start needs the
9641 # correct drbd minor to create the symlinks
9642 for disk in instance.disks:
9643 self.cfg.SetDiskID(disk, src_node)
9645 activate_disks = (not instance.admin_up)
9648 # Activate the instance disks if we'exporting a stopped instance
9649 feedback_fn("Activating disks for %s" % instance.name)
9650 _StartInstanceDisks(self, instance, None)
9653 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9656 helper.CreateSnapshots()
9658 if (self.op.shutdown and instance.admin_up and
9659 not self.op.remove_instance):
9660 assert not activate_disks
9661 feedback_fn("Starting instance %s" % instance.name)
9662 result = self.rpc.call_instance_start(src_node, instance, None, None)
9663 msg = result.fail_msg
9665 feedback_fn("Failed to start instance: %s" % msg)
9666 _ShutdownInstanceDisks(self, instance)
9667 raise errors.OpExecError("Could not start instance: %s" % msg)
9669 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9670 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9671 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9672 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9673 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9675 (key_name, _, _) = self.x509_key_name
9678 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9681 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9682 key_name, dest_ca_pem,
9687 # Check for backwards compatibility
9688 assert len(dresults) == len(instance.disks)
9689 assert compat.all(isinstance(i, bool) for i in dresults), \
9690 "Not all results are boolean: %r" % dresults
9694 feedback_fn("Deactivating disks for %s" % instance.name)
9695 _ShutdownInstanceDisks(self, instance)
9697 if not (compat.all(dresults) and fin_resu):
9700 failures.append("export finalization")
9701 if not compat.all(dresults):
9702 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9704 failures.append("disk export: disk(s) %s" % fdsk)
9706 raise errors.OpExecError("Export failed, errors in %s" %
9707 utils.CommaJoin(failures))
9709 # At this point, the export was successful, we can cleanup/finish
9711 # Remove instance if requested
9712 if self.op.remove_instance:
9713 feedback_fn("Removing instance %s" % instance.name)
9714 _RemoveInstance(self, feedback_fn, instance,
9715 self.op.ignore_remove_failures)
9717 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9718 self._CleanupExports(feedback_fn)
9720 return fin_resu, dresults
9723 class LURemoveExport(NoHooksLU):
9724 """Remove exports related to the named instance.
9732 def ExpandNames(self):
9733 self.needed_locks = {}
9734 # We need all nodes to be locked in order for RemoveExport to work, but we
9735 # don't need to lock the instance itself, as nothing will happen to it (and
9736 # we can remove exports also for a removed instance)
9737 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9739 def Exec(self, feedback_fn):
9740 """Remove any export.
9743 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9744 # If the instance was not found we'll try with the name that was passed in.
9745 # This will only work if it was an FQDN, though.
9747 if not instance_name:
9749 instance_name = self.op.instance_name
9751 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9752 exportlist = self.rpc.call_export_list(locked_nodes)
9754 for node in exportlist:
9755 msg = exportlist[node].fail_msg
9757 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9759 if instance_name in exportlist[node].payload:
9761 result = self.rpc.call_export_remove(node, instance_name)
9762 msg = result.fail_msg
9764 logging.error("Could not remove export for instance %s"
9765 " on node %s: %s", instance_name, node, msg)
9767 if fqdn_warn and not found:
9768 feedback_fn("Export not found. If trying to remove an export belonging"
9769 " to a deleted instance please use its Fully Qualified"
9773 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9776 This is an abstract class which is the parent of all the other tags LUs.
9780 def ExpandNames(self):
9781 self.needed_locks = {}
9782 if self.op.kind == constants.TAG_NODE:
9783 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9784 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9785 elif self.op.kind == constants.TAG_INSTANCE:
9786 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9787 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9789 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
9790 # not possible to acquire the BGL based on opcode parameters)
9792 def CheckPrereq(self):
9793 """Check prerequisites.
9796 if self.op.kind == constants.TAG_CLUSTER:
9797 self.target = self.cfg.GetClusterInfo()
9798 elif self.op.kind == constants.TAG_NODE:
9799 self.target = self.cfg.GetNodeInfo(self.op.name)
9800 elif self.op.kind == constants.TAG_INSTANCE:
9801 self.target = self.cfg.GetInstanceInfo(self.op.name)
9803 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9804 str(self.op.kind), errors.ECODE_INVAL)
9807 class LUGetTags(TagsLU):
9808 """Returns the tags of a given object.
9812 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9813 # Name is only meaningful for nodes and instances
9814 ("name", _NoDefault, _TMaybeString),
9818 def ExpandNames(self):
9819 TagsLU.ExpandNames(self)
9821 # Share locks as this is only a read operation
9822 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9824 def Exec(self, feedback_fn):
9825 """Returns the tag list.
9828 return list(self.target.GetTags())
9831 class LUSearchTags(NoHooksLU):
9832 """Searches the tags for a given pattern.
9836 ("pattern", _NoDefault, _TNonEmptyString),
9840 def ExpandNames(self):
9841 self.needed_locks = {}
9843 def CheckPrereq(self):
9844 """Check prerequisites.
9846 This checks the pattern passed for validity by compiling it.
9850 self.re = re.compile(self.op.pattern)
9851 except re.error, err:
9852 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9853 (self.op.pattern, err), errors.ECODE_INVAL)
9855 def Exec(self, feedback_fn):
9856 """Returns the tag list.
9860 tgts = [("/cluster", cfg.GetClusterInfo())]
9861 ilist = cfg.GetAllInstancesInfo().values()
9862 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9863 nlist = cfg.GetAllNodesInfo().values()
9864 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9866 for path, target in tgts:
9867 for tag in target.GetTags():
9868 if self.re.search(tag):
9869 results.append((path, tag))
9873 class LUAddTags(TagsLU):
9874 """Sets a tag on a given object.
9878 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9879 # Name is only meaningful for nodes and instances
9880 ("name", _NoDefault, _TMaybeString),
9881 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9885 def CheckPrereq(self):
9886 """Check prerequisites.
9888 This checks the type and length of the tag name and value.
9891 TagsLU.CheckPrereq(self)
9892 for tag in self.op.tags:
9893 objects.TaggableObject.ValidateTag(tag)
9895 def Exec(self, feedback_fn):
9900 for tag in self.op.tags:
9901 self.target.AddTag(tag)
9902 except errors.TagError, err:
9903 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9904 self.cfg.Update(self.target, feedback_fn)
9907 class LUDelTags(TagsLU):
9908 """Delete a list of tags from a given object.
9912 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9913 # Name is only meaningful for nodes and instances
9914 ("name", _NoDefault, _TMaybeString),
9915 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9919 def CheckPrereq(self):
9920 """Check prerequisites.
9922 This checks that we have the given tag.
9925 TagsLU.CheckPrereq(self)
9926 for tag in self.op.tags:
9927 objects.TaggableObject.ValidateTag(tag)
9928 del_tags = frozenset(self.op.tags)
9929 cur_tags = self.target.GetTags()
9930 if not del_tags <= cur_tags:
9931 diff_tags = del_tags - cur_tags
9932 diff_names = ["'%s'" % tag for tag in diff_tags]
9934 raise errors.OpPrereqError("Tag(s) %s not found" %
9935 (",".join(diff_names)), errors.ECODE_NOENT)
9937 def Exec(self, feedback_fn):
9938 """Remove the tag from the object.
9941 for tag in self.op.tags:
9942 self.target.RemoveTag(tag)
9943 self.cfg.Update(self.target, feedback_fn)
9946 class LUTestDelay(NoHooksLU):
9947 """Sleep for a specified amount of time.
9949 This LU sleeps on the master and/or nodes for a specified amount of
9954 ("duration", _NoDefault, _TFloat),
9955 ("on_master", True, _TBool),
9956 ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9957 ("repeat", 0, _TPositiveInt)
9961 def ExpandNames(self):
9962 """Expand names and set required locks.
9964 This expands the node list, if any.
9967 self.needed_locks = {}
9968 if self.op.on_nodes:
9969 # _GetWantedNodes can be used here, but is not always appropriate to use
9970 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9972 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9973 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9975 def _TestDelay(self):
9976 """Do the actual sleep.
9979 if self.op.on_master:
9980 if not utils.TestDelay(self.op.duration):
9981 raise errors.OpExecError("Error during master delay test")
9982 if self.op.on_nodes:
9983 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9984 for node, node_result in result.items():
9985 node_result.Raise("Failure during rpc call to node %s" % node)
9987 def Exec(self, feedback_fn):
9988 """Execute the test delay opcode, with the wanted repetitions.
9991 if self.op.repeat == 0:
9994 top_value = self.op.repeat - 1
9995 for i in range(self.op.repeat):
9996 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10000 class LUTestJobqueue(NoHooksLU):
10001 """Utility LU to test some aspects of the job queue.
10005 ("notify_waitlock", False, _TBool),
10006 ("notify_exec", False, _TBool),
10007 ("log_messages", _EmptyList, _TListOf(_TString)),
10008 ("fail", False, _TBool),
10012 # Must be lower than default timeout for WaitForJobChange to see whether it
10013 # notices changed jobs
10014 _CLIENT_CONNECT_TIMEOUT = 20.0
10015 _CLIENT_CONFIRM_TIMEOUT = 60.0
10018 def _NotifyUsingSocket(cls, cb, errcls):
10019 """Opens a Unix socket and waits for another program to connect.
10022 @param cb: Callback to send socket name to client
10023 @type errcls: class
10024 @param errcls: Exception class to use for errors
10027 # Using a temporary directory as there's no easy way to create temporary
10028 # sockets without writing a custom loop around tempfile.mktemp and
10030 tmpdir = tempfile.mkdtemp()
10032 tmpsock = utils.PathJoin(tmpdir, "sock")
10034 logging.debug("Creating temporary socket at %s", tmpsock)
10035 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10040 # Send details to client
10043 # Wait for client to connect before continuing
10044 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10046 (conn, _) = sock.accept()
10047 except socket.error, err:
10048 raise errcls("Client didn't connect in time (%s)" % err)
10052 # Remove as soon as client is connected
10053 shutil.rmtree(tmpdir)
10055 # Wait for client to close
10058 # pylint: disable-msg=E1101
10059 # Instance of '_socketobject' has no ... member
10060 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10062 except socket.error, err:
10063 raise errcls("Client failed to confirm notification (%s)" % err)
10067 def _SendNotification(self, test, arg, sockname):
10068 """Sends a notification to the client.
10071 @param test: Test name
10072 @param arg: Test argument (depends on test)
10073 @type sockname: string
10074 @param sockname: Socket path
10077 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10079 def _Notify(self, prereq, test, arg):
10080 """Notifies the client of a test.
10083 @param prereq: Whether this is a prereq-phase test
10085 @param test: Test name
10086 @param arg: Test argument (depends on test)
10090 errcls = errors.OpPrereqError
10092 errcls = errors.OpExecError
10094 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10098 def CheckArguments(self):
10099 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10100 self.expandnames_calls = 0
10102 def ExpandNames(self):
10103 checkargs_calls = getattr(self, "checkargs_calls", 0)
10104 if checkargs_calls < 1:
10105 raise errors.ProgrammerError("CheckArguments was not called")
10107 self.expandnames_calls += 1
10109 if self.op.notify_waitlock:
10110 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10112 self.LogInfo("Expanding names")
10114 # Get lock on master node (just to get a lock, not for a particular reason)
10115 self.needed_locks = {
10116 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10119 def Exec(self, feedback_fn):
10120 if self.expandnames_calls < 1:
10121 raise errors.ProgrammerError("ExpandNames was not called")
10123 if self.op.notify_exec:
10124 self._Notify(False, constants.JQT_EXEC, None)
10126 self.LogInfo("Executing")
10128 if self.op.log_messages:
10129 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10130 for idx, msg in enumerate(self.op.log_messages):
10131 self.LogInfo("Sending log message %s", idx + 1)
10132 feedback_fn(constants.JQT_MSGPREFIX + msg)
10133 # Report how many test messages have been sent
10134 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10137 raise errors.OpExecError("Opcode failure was requested")
10142 class IAllocator(object):
10143 """IAllocator framework.
10145 An IAllocator instance has three sets of attributes:
10146 - cfg that is needed to query the cluster
10147 - input data (all members of the _KEYS class attribute are required)
10148 - four buffer attributes (in|out_data|text), that represent the
10149 input (to the external script) in text and data structure format,
10150 and the output from it, again in two formats
10151 - the result variables from the script (success, info, nodes) for
10155 # pylint: disable-msg=R0902
10156 # lots of instance attributes
10158 "name", "mem_size", "disks", "disk_template",
10159 "os", "tags", "nics", "vcpus", "hypervisor",
10162 "name", "relocate_from",
10168 def __init__(self, cfg, rpc, mode, **kwargs):
10171 # init buffer variables
10172 self.in_text = self.out_text = self.in_data = self.out_data = None
10173 # init all input fields so that pylint is happy
10175 self.mem_size = self.disks = self.disk_template = None
10176 self.os = self.tags = self.nics = self.vcpus = None
10177 self.hypervisor = None
10178 self.relocate_from = None
10180 self.evac_nodes = None
10182 self.required_nodes = None
10183 # init result fields
10184 self.success = self.info = self.result = None
10185 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10186 keyset = self._ALLO_KEYS
10187 fn = self._AddNewInstance
10188 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10189 keyset = self._RELO_KEYS
10190 fn = self._AddRelocateInstance
10191 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10192 keyset = self._EVAC_KEYS
10193 fn = self._AddEvacuateNodes
10195 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10196 " IAllocator" % self.mode)
10198 if key not in keyset:
10199 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10200 " IAllocator" % key)
10201 setattr(self, key, kwargs[key])
10204 if key not in kwargs:
10205 raise errors.ProgrammerError("Missing input parameter '%s' to"
10206 " IAllocator" % key)
10207 self._BuildInputData(fn)
10209 def _ComputeClusterData(self):
10210 """Compute the generic allocator input data.
10212 This is the data that is independent of the actual operation.
10216 cluster_info = cfg.GetClusterInfo()
10219 "version": constants.IALLOCATOR_VERSION,
10220 "cluster_name": cfg.GetClusterName(),
10221 "cluster_tags": list(cluster_info.GetTags()),
10222 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10223 # we don't have job IDs
10225 iinfo = cfg.GetAllInstancesInfo().values()
10226 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10230 node_list = cfg.GetNodeList()
10232 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10233 hypervisor_name = self.hypervisor
10234 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10235 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10236 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10237 hypervisor_name = cluster_info.enabled_hypervisors[0]
10239 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10242 self.rpc.call_all_instances_info(node_list,
10243 cluster_info.enabled_hypervisors)
10244 for nname, nresult in node_data.items():
10245 # first fill in static (config-based) values
10246 ninfo = cfg.GetNodeInfo(nname)
10248 "tags": list(ninfo.GetTags()),
10249 "primary_ip": ninfo.primary_ip,
10250 "secondary_ip": ninfo.secondary_ip,
10251 "offline": ninfo.offline,
10252 "drained": ninfo.drained,
10253 "master_candidate": ninfo.master_candidate,
10256 if not (ninfo.offline or ninfo.drained):
10257 nresult.Raise("Can't get data for node %s" % nname)
10258 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10260 remote_info = nresult.payload
10262 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10263 'vg_size', 'vg_free', 'cpu_total']:
10264 if attr not in remote_info:
10265 raise errors.OpExecError("Node '%s' didn't return attribute"
10266 " '%s'" % (nname, attr))
10267 if not isinstance(remote_info[attr], int):
10268 raise errors.OpExecError("Node '%s' returned invalid value"
10270 (nname, attr, remote_info[attr]))
10271 # compute memory used by primary instances
10272 i_p_mem = i_p_up_mem = 0
10273 for iinfo, beinfo in i_list:
10274 if iinfo.primary_node == nname:
10275 i_p_mem += beinfo[constants.BE_MEMORY]
10276 if iinfo.name not in node_iinfo[nname].payload:
10279 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10280 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10281 remote_info['memory_free'] -= max(0, i_mem_diff)
10284 i_p_up_mem += beinfo[constants.BE_MEMORY]
10286 # compute memory used by instances
10288 "total_memory": remote_info['memory_total'],
10289 "reserved_memory": remote_info['memory_dom0'],
10290 "free_memory": remote_info['memory_free'],
10291 "total_disk": remote_info['vg_size'],
10292 "free_disk": remote_info['vg_free'],
10293 "total_cpus": remote_info['cpu_total'],
10294 "i_pri_memory": i_p_mem,
10295 "i_pri_up_memory": i_p_up_mem,
10297 pnr.update(pnr_dyn)
10299 node_results[nname] = pnr
10300 data["nodes"] = node_results
10304 for iinfo, beinfo in i_list:
10306 for nic in iinfo.nics:
10307 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10308 nic_dict = {"mac": nic.mac,
10310 "mode": filled_params[constants.NIC_MODE],
10311 "link": filled_params[constants.NIC_LINK],
10313 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10314 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10315 nic_data.append(nic_dict)
10317 "tags": list(iinfo.GetTags()),
10318 "admin_up": iinfo.admin_up,
10319 "vcpus": beinfo[constants.BE_VCPUS],
10320 "memory": beinfo[constants.BE_MEMORY],
10322 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10324 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10325 "disk_template": iinfo.disk_template,
10326 "hypervisor": iinfo.hypervisor,
10328 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10330 instance_data[iinfo.name] = pir
10332 data["instances"] = instance_data
10334 self.in_data = data
10336 def _AddNewInstance(self):
10337 """Add new instance data to allocator structure.
10339 This in combination with _AllocatorGetClusterData will create the
10340 correct structure needed as input for the allocator.
10342 The checks for the completeness of the opcode must have already been
10346 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10348 if self.disk_template in constants.DTS_NET_MIRROR:
10349 self.required_nodes = 2
10351 self.required_nodes = 1
10354 "disk_template": self.disk_template,
10357 "vcpus": self.vcpus,
10358 "memory": self.mem_size,
10359 "disks": self.disks,
10360 "disk_space_total": disk_space,
10362 "required_nodes": self.required_nodes,
10366 def _AddRelocateInstance(self):
10367 """Add relocate instance data to allocator structure.
10369 This in combination with _IAllocatorGetClusterData will create the
10370 correct structure needed as input for the allocator.
10372 The checks for the completeness of the opcode must have already been
10376 instance = self.cfg.GetInstanceInfo(self.name)
10377 if instance is None:
10378 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10379 " IAllocator" % self.name)
10381 if instance.disk_template not in constants.DTS_NET_MIRROR:
10382 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10383 errors.ECODE_INVAL)
10385 if len(instance.secondary_nodes) != 1:
10386 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10387 errors.ECODE_STATE)
10389 self.required_nodes = 1
10390 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10391 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10395 "disk_space_total": disk_space,
10396 "required_nodes": self.required_nodes,
10397 "relocate_from": self.relocate_from,
10401 def _AddEvacuateNodes(self):
10402 """Add evacuate nodes data to allocator structure.
10406 "evac_nodes": self.evac_nodes
10410 def _BuildInputData(self, fn):
10411 """Build input data structures.
10414 self._ComputeClusterData()
10417 request["type"] = self.mode
10418 self.in_data["request"] = request
10420 self.in_text = serializer.Dump(self.in_data)
10422 def Run(self, name, validate=True, call_fn=None):
10423 """Run an instance allocator and return the results.
10426 if call_fn is None:
10427 call_fn = self.rpc.call_iallocator_runner
10429 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10430 result.Raise("Failure while running the iallocator script")
10432 self.out_text = result.payload
10434 self._ValidateResult()
10436 def _ValidateResult(self):
10437 """Process the allocator results.
10439 This will process and if successful save the result in
10440 self.out_data and the other parameters.
10444 rdict = serializer.Load(self.out_text)
10445 except Exception, err:
10446 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10448 if not isinstance(rdict, dict):
10449 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10451 # TODO: remove backwards compatiblity in later versions
10452 if "nodes" in rdict and "result" not in rdict:
10453 rdict["result"] = rdict["nodes"]
10456 for key in "success", "info", "result":
10457 if key not in rdict:
10458 raise errors.OpExecError("Can't parse iallocator results:"
10459 " missing key '%s'" % key)
10460 setattr(self, key, rdict[key])
10462 if not isinstance(rdict["result"], list):
10463 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10465 self.out_data = rdict
10468 class LUTestAllocator(NoHooksLU):
10469 """Run allocator tests.
10471 This LU runs the allocator tests
10475 ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10476 ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10477 ("name", _NoDefault, _TNonEmptyString),
10478 ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10479 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10480 _TOr(_TNone, _TNonEmptyString))))),
10481 ("disks", _NoDefault, _TOr(_TNone, _TList)),
10482 ("hypervisor", None, _TMaybeString),
10483 ("allocator", None, _TMaybeString),
10484 ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10485 ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10486 ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10487 ("os", None, _TMaybeString),
10488 ("disk_template", None, _TMaybeString),
10489 ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10492 def CheckPrereq(self):
10493 """Check prerequisites.
10495 This checks the opcode parameters depending on the director and mode test.
10498 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10499 for attr in ["mem_size", "disks", "disk_template",
10500 "os", "tags", "nics", "vcpus"]:
10501 if not hasattr(self.op, attr):
10502 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10503 attr, errors.ECODE_INVAL)
10504 iname = self.cfg.ExpandInstanceName(self.op.name)
10505 if iname is not None:
10506 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10507 iname, errors.ECODE_EXISTS)
10508 if not isinstance(self.op.nics, list):
10509 raise errors.OpPrereqError("Invalid parameter 'nics'",
10510 errors.ECODE_INVAL)
10511 if not isinstance(self.op.disks, list):
10512 raise errors.OpPrereqError("Invalid parameter 'disks'",
10513 errors.ECODE_INVAL)
10514 for row in self.op.disks:
10515 if (not isinstance(row, dict) or
10516 "size" not in row or
10517 not isinstance(row["size"], int) or
10518 "mode" not in row or
10519 row["mode"] not in ['r', 'w']):
10520 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10521 " parameter", errors.ECODE_INVAL)
10522 if self.op.hypervisor is None:
10523 self.op.hypervisor = self.cfg.GetHypervisorType()
10524 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10525 fname = _ExpandInstanceName(self.cfg, self.op.name)
10526 self.op.name = fname
10527 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10528 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10529 if not hasattr(self.op, "evac_nodes"):
10530 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10531 " opcode input", errors.ECODE_INVAL)
10533 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10534 self.op.mode, errors.ECODE_INVAL)
10536 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10537 if self.op.allocator is None:
10538 raise errors.OpPrereqError("Missing allocator name",
10539 errors.ECODE_INVAL)
10540 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10541 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10542 self.op.direction, errors.ECODE_INVAL)
10544 def Exec(self, feedback_fn):
10545 """Run the allocator test.
10548 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10549 ial = IAllocator(self.cfg, self.rpc,
10552 mem_size=self.op.mem_size,
10553 disks=self.op.disks,
10554 disk_template=self.op.disk_template,
10558 vcpus=self.op.vcpus,
10559 hypervisor=self.op.hypervisor,
10561 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10562 ial = IAllocator(self.cfg, self.rpc,
10565 relocate_from=list(self.relocate_from),
10567 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10568 ial = IAllocator(self.cfg, self.rpc,
10570 evac_nodes=self.op.evac_nodes)
10572 raise errors.ProgrammerError("Uncatched mode %s in"
10573 " LUTestAllocator.Exec", self.op.mode)
10575 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10576 result = ial.in_text
10578 ial.Run(self.op.allocator, validate=False)
10579 result = ial.out_text