4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
57 import ganeti.masterd.instance # pylint: disable-msg=W0611
60 # Modifiable default values; need to define these here before the
64 """Returns an empty list.
71 """Returns an empty dict.
77 #: The without-default default value
81 #: The no-type (value to complex to check it in the type system)
87 """Checks if the given value is not None.
90 return val is not None
94 """Checks if the given value is None.
101 """Checks if the given value is a boolean.
104 return isinstance(val, bool)
108 """Checks if the given value is an integer.
111 return isinstance(val, int)
115 """Checks if the given value is a float.
118 return isinstance(val, float)
122 """Checks if the given value is a string.
125 return isinstance(val, basestring)
129 """Checks if a given value evaluates to a boolean True value.
135 def _TElemOf(target_list):
136 """Builds a function that checks if a given value is a member of a list.
139 return lambda val: val in target_list
144 """Checks if the given value is a list.
147 return isinstance(val, list)
151 """Checks if the given value is a dictionary.
154 return isinstance(val, dict)
157 def _TIsLength(size):
158 """Check is the given container is of the given size.
161 return lambda container: len(container) == size
166 """Combine multiple functions using an AND operation.
170 return compat.all(t(val) for t in args)
175 """Combine multiple functions using an AND operation.
179 return compat.any(t(val) for t in args)
184 """Checks that a modified version of the argument passes the given test.
187 return lambda val: test(fn(val))
192 #: a non-empty string
193 _TNonEmptyString = _TAnd(_TString, _TTrue)
196 #: a maybe non-empty string
197 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
200 #: a maybe boolean (bool or none)
201 _TMaybeBool = _TOr(_TBool, _TNone)
204 #: a positive integer
205 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
207 #: a strictly positive integer
208 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
211 def _TListOf(my_type):
212 """Checks if a given value is a list with all elements of the same type.
216 lambda lst: compat.all(my_type(v) for v in lst))
219 def _TDictOf(key_type, val_type):
220 """Checks a dict type for the type of its key/values.
224 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
225 and compat.all(val_type(v)
226 for v in my_dict.values())))
229 # Common opcode attributes
231 #: output fields for a query operation
232 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
235 #: the shutdown timeout
236 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
239 #: the force parameter
240 _PForce = ("force", False, _TBool)
242 #: a required instance name (for single-instance LUs)
243 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
246 #: a required node name (for single-node LUs)
247 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
249 #: the migration type (live/non-live)
250 _PMigrationMode = ("mode", None, _TOr(_TNone,
251 _TElemOf(constants.HT_MIGRATION_MODES)))
253 #: the obsolete 'live' mode (boolean)
254 _PMigrationLive = ("live", None, _TMaybeBool)
258 class LogicalUnit(object):
259 """Logical Unit base class.
261 Subclasses must follow these rules:
262 - implement ExpandNames
263 - implement CheckPrereq (except when tasklets are used)
264 - implement Exec (except when tasklets are used)
265 - implement BuildHooksEnv
266 - redefine HPATH and HTYPE
267 - optionally redefine their run requirements:
268 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
270 Note that all commands require root permissions.
272 @ivar dry_run_result: the value (if any) that will be returned to the caller
273 in dry-run mode (signalled by opcode dry_run parameter)
274 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
275 they should get if not already defined, and types they must match
283 def __init__(self, processor, op, context, rpc):
284 """Constructor for LogicalUnit.
286 This needs to be overridden in derived classes in order to check op
290 self.proc = processor
292 self.cfg = context.cfg
293 self.context = context
295 # Dicts used to declare locking needs to mcpu
296 self.needed_locks = None
297 self.acquired_locks = {}
298 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
300 self.remove_locks = {}
301 # Used to force good behavior when calling helper functions
302 self.recalculate_locks = {}
305 self.Log = processor.Log # pylint: disable-msg=C0103
306 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
307 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
308 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
309 # support for dry-run
310 self.dry_run_result = None
311 # support for generic debug attribute
312 if (not hasattr(self.op, "debug_level") or
313 not isinstance(self.op.debug_level, int)):
314 self.op.debug_level = 0
319 # The new kind-of-type-system
320 op_id = self.op.OP_ID
321 for attr_name, aval, test in self._OP_PARAMS:
322 if not hasattr(op, attr_name):
323 if aval == _NoDefault:
324 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
325 (op_id, attr_name), errors.ECODE_INVAL)
331 setattr(self.op, attr_name, dval)
332 attr_val = getattr(op, attr_name)
336 if not callable(test):
337 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
338 " given type is not a proper type (%s)" %
339 (op_id, attr_name, test))
340 if not test(attr_val):
341 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
342 self.op.OP_ID, attr_name, type(attr_val), attr_val)
343 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
344 (op_id, attr_name), errors.ECODE_INVAL)
346 self.CheckArguments()
349 """Returns the SshRunner object
353 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
356 ssh = property(fget=__GetSSH)
358 def CheckArguments(self):
359 """Check syntactic validity for the opcode arguments.
361 This method is for doing a simple syntactic check and ensure
362 validity of opcode parameters, without any cluster-related
363 checks. While the same can be accomplished in ExpandNames and/or
364 CheckPrereq, doing these separate is better because:
366 - ExpandNames is left as as purely a lock-related function
367 - CheckPrereq is run after we have acquired locks (and possible
370 The function is allowed to change the self.op attribute so that
371 later methods can no longer worry about missing parameters.
376 def ExpandNames(self):
377 """Expand names for this LU.
379 This method is called before starting to execute the opcode, and it should
380 update all the parameters of the opcode to their canonical form (e.g. a
381 short node name must be fully expanded after this method has successfully
382 completed). This way locking, hooks, logging, ecc. can work correctly.
384 LUs which implement this method must also populate the self.needed_locks
385 member, as a dict with lock levels as keys, and a list of needed lock names
388 - use an empty dict if you don't need any lock
389 - if you don't need any lock at a particular level omit that level
390 - don't put anything for the BGL level
391 - if you want all locks at a level use locking.ALL_SET as a value
393 If you need to share locks (rather than acquire them exclusively) at one
394 level you can modify self.share_locks, setting a true value (usually 1) for
395 that level. By default locks are not shared.
397 This function can also define a list of tasklets, which then will be
398 executed in order instead of the usual LU-level CheckPrereq and Exec
399 functions, if those are not defined by the LU.
403 # Acquire all nodes and one instance
404 self.needed_locks = {
405 locking.LEVEL_NODE: locking.ALL_SET,
406 locking.LEVEL_INSTANCE: ['instance1.example.com'],
408 # Acquire just two nodes
409 self.needed_locks = {
410 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
413 self.needed_locks = {} # No, you can't leave it to the default value None
416 # The implementation of this method is mandatory only if the new LU is
417 # concurrent, so that old LUs don't need to be changed all at the same
420 self.needed_locks = {} # Exclusive LUs don't need locks.
422 raise NotImplementedError
424 def DeclareLocks(self, level):
425 """Declare LU locking needs for a level
427 While most LUs can just declare their locking needs at ExpandNames time,
428 sometimes there's the need to calculate some locks after having acquired
429 the ones before. This function is called just before acquiring locks at a
430 particular level, but after acquiring the ones at lower levels, and permits
431 such calculations. It can be used to modify self.needed_locks, and by
432 default it does nothing.
434 This function is only called if you have something already set in
435 self.needed_locks for the level.
437 @param level: Locking level which is going to be locked
438 @type level: member of ganeti.locking.LEVELS
442 def CheckPrereq(self):
443 """Check prerequisites for this LU.
445 This method should check that the prerequisites for the execution
446 of this LU are fulfilled. It can do internode communication, but
447 it should be idempotent - no cluster or system changes are
450 The method should raise errors.OpPrereqError in case something is
451 not fulfilled. Its return value is ignored.
453 This method should also update all the parameters of the opcode to
454 their canonical form if it hasn't been done by ExpandNames before.
457 if self.tasklets is not None:
458 for (idx, tl) in enumerate(self.tasklets):
459 logging.debug("Checking prerequisites for tasklet %s/%s",
460 idx + 1, len(self.tasklets))
465 def Exec(self, feedback_fn):
468 This method should implement the actual work. It should raise
469 errors.OpExecError for failures that are somewhat dealt with in
473 if self.tasklets is not None:
474 for (idx, tl) in enumerate(self.tasklets):
475 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
478 raise NotImplementedError
480 def BuildHooksEnv(self):
481 """Build hooks environment for this LU.
483 This method should return a three-node tuple consisting of: a dict
484 containing the environment that will be used for running the
485 specific hook for this LU, a list of node names on which the hook
486 should run before the execution, and a list of node names on which
487 the hook should run after the execution.
489 The keys of the dict must not have 'GANETI_' prefixed as this will
490 be handled in the hooks runner. Also note additional keys will be
491 added by the hooks runner. If the LU doesn't define any
492 environment, an empty dict (and not None) should be returned.
494 No nodes should be returned as an empty list (and not None).
496 Note that if the HPATH for a LU class is None, this function will
500 raise NotImplementedError
502 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
503 """Notify the LU about the results of its hooks.
505 This method is called every time a hooks phase is executed, and notifies
506 the Logical Unit about the hooks' result. The LU can then use it to alter
507 its result based on the hooks. By default the method does nothing and the
508 previous result is passed back unchanged but any LU can define it if it
509 wants to use the local cluster hook-scripts somehow.
511 @param phase: one of L{constants.HOOKS_PHASE_POST} or
512 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
513 @param hook_results: the results of the multi-node hooks rpc call
514 @param feedback_fn: function used send feedback back to the caller
515 @param lu_result: the previous Exec result this LU had, or None
517 @return: the new Exec result, based on the previous result
521 # API must be kept, thus we ignore the unused argument and could
522 # be a function warnings
523 # pylint: disable-msg=W0613,R0201
526 def _ExpandAndLockInstance(self):
527 """Helper function to expand and lock an instance.
529 Many LUs that work on an instance take its name in self.op.instance_name
530 and need to expand it and then declare the expanded name for locking. This
531 function does it, and then updates self.op.instance_name to the expanded
532 name. It also initializes needed_locks as a dict, if this hasn't been done
536 if self.needed_locks is None:
537 self.needed_locks = {}
539 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
540 "_ExpandAndLockInstance called with instance-level locks set"
541 self.op.instance_name = _ExpandInstanceName(self.cfg,
542 self.op.instance_name)
543 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
545 def _LockInstancesNodes(self, primary_only=False):
546 """Helper function to declare instances' nodes for locking.
548 This function should be called after locking one or more instances to lock
549 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
550 with all primary or secondary nodes for instances already locked and
551 present in self.needed_locks[locking.LEVEL_INSTANCE].
553 It should be called from DeclareLocks, and for safety only works if
554 self.recalculate_locks[locking.LEVEL_NODE] is set.
556 In the future it may grow parameters to just lock some instance's nodes, or
557 to just lock primaries or secondary nodes, if needed.
559 If should be called in DeclareLocks in a way similar to::
561 if level == locking.LEVEL_NODE:
562 self._LockInstancesNodes()
564 @type primary_only: boolean
565 @param primary_only: only lock primary nodes of locked instances
568 assert locking.LEVEL_NODE in self.recalculate_locks, \
569 "_LockInstancesNodes helper function called with no nodes to recalculate"
571 # TODO: check if we're really been called with the instance locks held
573 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
574 # future we might want to have different behaviors depending on the value
575 # of self.recalculate_locks[locking.LEVEL_NODE]
577 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
578 instance = self.context.cfg.GetInstanceInfo(instance_name)
579 wanted_nodes.append(instance.primary_node)
581 wanted_nodes.extend(instance.secondary_nodes)
583 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
584 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
585 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
586 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
588 del self.recalculate_locks[locking.LEVEL_NODE]
591 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
592 """Simple LU which runs no hooks.
594 This LU is intended as a parent for other LogicalUnits which will
595 run no hooks, in order to reduce duplicate code.
601 def BuildHooksEnv(self):
602 """Empty BuildHooksEnv for NoHooksLu.
604 This just raises an error.
607 assert False, "BuildHooksEnv called for NoHooksLUs"
611 """Tasklet base class.
613 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
614 they can mix legacy code with tasklets. Locking needs to be done in the LU,
615 tasklets know nothing about locks.
617 Subclasses must follow these rules:
618 - Implement CheckPrereq
622 def __init__(self, lu):
629 def CheckPrereq(self):
630 """Check prerequisites for this tasklets.
632 This method should check whether the prerequisites for the execution of
633 this tasklet are fulfilled. It can do internode communication, but it
634 should be idempotent - no cluster or system changes are allowed.
636 The method should raise errors.OpPrereqError in case something is not
637 fulfilled. Its return value is ignored.
639 This method should also update all parameters to their canonical form if it
640 hasn't been done before.
645 def Exec(self, feedback_fn):
646 """Execute the tasklet.
648 This method should implement the actual work. It should raise
649 errors.OpExecError for failures that are somewhat dealt with in code, or
653 raise NotImplementedError
656 def _GetWantedNodes(lu, nodes):
657 """Returns list of checked and expanded node names.
659 @type lu: L{LogicalUnit}
660 @param lu: the logical unit on whose behalf we execute
662 @param nodes: list of node names or None for all nodes
664 @return: the list of nodes, sorted
665 @raise errors.ProgrammerError: if the nodes parameter is wrong type
669 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
670 " non-empty list of nodes whose name is to be expanded.")
672 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
673 return utils.NiceSort(wanted)
676 def _GetWantedInstances(lu, instances):
677 """Returns list of checked and expanded instance names.
679 @type lu: L{LogicalUnit}
680 @param lu: the logical unit on whose behalf we execute
681 @type instances: list
682 @param instances: list of instance names or None for all instances
684 @return: the list of instances, sorted
685 @raise errors.OpPrereqError: if the instances parameter is wrong type
686 @raise errors.OpPrereqError: if any of the passed instances is not found
690 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
692 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
696 def _GetUpdatedParams(old_params, update_dict,
697 use_default=True, use_none=False):
698 """Return the new version of a parameter dictionary.
700 @type old_params: dict
701 @param old_params: old parameters
702 @type update_dict: dict
703 @param update_dict: dict containing new parameter values, or
704 constants.VALUE_DEFAULT to reset the parameter to its default
706 @param use_default: boolean
707 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
708 values as 'to be deleted' values
709 @param use_none: boolean
710 @type use_none: whether to recognise C{None} values as 'to be
713 @return: the new parameter dictionary
716 params_copy = copy.deepcopy(old_params)
717 for key, val in update_dict.iteritems():
718 if ((use_default and val == constants.VALUE_DEFAULT) or
719 (use_none and val is None)):
725 params_copy[key] = val
729 def _CheckOutputFields(static, dynamic, selected):
730 """Checks whether all selected fields are valid.
732 @type static: L{utils.FieldSet}
733 @param static: static fields set
734 @type dynamic: L{utils.FieldSet}
735 @param dynamic: dynamic fields set
742 delta = f.NonMatching(selected)
744 raise errors.OpPrereqError("Unknown output fields selected: %s"
745 % ",".join(delta), errors.ECODE_INVAL)
748 def _CheckGlobalHvParams(params):
749 """Validates that given hypervisor params are not global ones.
751 This will ensure that instances don't get customised versions of
755 used_globals = constants.HVC_GLOBALS.intersection(params)
757 msg = ("The following hypervisor parameters are global and cannot"
758 " be customized at instance level, please modify them at"
759 " cluster level: %s" % utils.CommaJoin(used_globals))
760 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
763 def _CheckNodeOnline(lu, node):
764 """Ensure that a given node is online.
766 @param lu: the LU on behalf of which we make the check
767 @param node: the node to check
768 @raise errors.OpPrereqError: if the node is offline
771 if lu.cfg.GetNodeInfo(node).offline:
772 raise errors.OpPrereqError("Can't use offline node %s" % node,
776 def _CheckNodeNotDrained(lu, node):
777 """Ensure that a given node is not drained.
779 @param lu: the LU on behalf of which we make the check
780 @param node: the node to check
781 @raise errors.OpPrereqError: if the node is drained
784 if lu.cfg.GetNodeInfo(node).drained:
785 raise errors.OpPrereqError("Can't use drained node %s" % node,
789 def _CheckNodeHasOS(lu, node, os_name, force_variant):
790 """Ensure that a node supports a given OS.
792 @param lu: the LU on behalf of which we make the check
793 @param node: the node to check
794 @param os_name: the OS to query about
795 @param force_variant: whether to ignore variant errors
796 @raise errors.OpPrereqError: if the node is not supporting the OS
799 result = lu.rpc.call_os_get(node, os_name)
800 result.Raise("OS '%s' not in supported OS list for node %s" %
802 prereq=True, ecode=errors.ECODE_INVAL)
803 if not force_variant:
804 _CheckOSVariant(result.payload, os_name)
807 def _RequireFileStorage():
808 """Checks that file storage is enabled.
810 @raise errors.OpPrereqError: when file storage is disabled
813 if not constants.ENABLE_FILE_STORAGE:
814 raise errors.OpPrereqError("File storage disabled at configure time",
818 def _CheckDiskTemplate(template):
819 """Ensure a given disk template is valid.
822 if template not in constants.DISK_TEMPLATES:
823 msg = ("Invalid disk template name '%s', valid templates are: %s" %
824 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
825 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
826 if template == constants.DT_FILE:
827 _RequireFileStorage()
831 def _CheckStorageType(storage_type):
832 """Ensure a given storage type is valid.
835 if storage_type not in constants.VALID_STORAGE_TYPES:
836 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
838 if storage_type == constants.ST_FILE:
839 _RequireFileStorage()
843 def _GetClusterDomainSecret():
844 """Reads the cluster domain secret.
847 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
851 def _CheckInstanceDown(lu, instance, reason):
852 """Ensure that an instance is not running."""
853 if instance.admin_up:
854 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
855 (instance.name, reason), errors.ECODE_STATE)
857 pnode = instance.primary_node
858 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
859 ins_l.Raise("Can't contact node %s for instance information" % pnode,
860 prereq=True, ecode=errors.ECODE_ENVIRON)
862 if instance.name in ins_l.payload:
863 raise errors.OpPrereqError("Instance %s is running, %s" %
864 (instance.name, reason), errors.ECODE_STATE)
867 def _ExpandItemName(fn, name, kind):
868 """Expand an item name.
870 @param fn: the function to use for expansion
871 @param name: requested item name
872 @param kind: text description ('Node' or 'Instance')
873 @return: the resolved (full) name
874 @raise errors.OpPrereqError: if the item is not found
878 if full_name is None:
879 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
884 def _ExpandNodeName(cfg, name):
885 """Wrapper over L{_ExpandItemName} for nodes."""
886 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
889 def _ExpandInstanceName(cfg, name):
890 """Wrapper over L{_ExpandItemName} for instance."""
891 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
894 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
895 memory, vcpus, nics, disk_template, disks,
896 bep, hvp, hypervisor_name):
897 """Builds instance related env variables for hooks
899 This builds the hook environment from individual variables.
902 @param name: the name of the instance
903 @type primary_node: string
904 @param primary_node: the name of the instance's primary node
905 @type secondary_nodes: list
906 @param secondary_nodes: list of secondary nodes as strings
907 @type os_type: string
908 @param os_type: the name of the instance's OS
909 @type status: boolean
910 @param status: the should_run status of the instance
912 @param memory: the memory size of the instance
914 @param vcpus: the count of VCPUs the instance has
916 @param nics: list of tuples (ip, mac, mode, link) representing
917 the NICs the instance has
918 @type disk_template: string
919 @param disk_template: the disk template of the instance
921 @param disks: the list of (size, mode) pairs
923 @param bep: the backend parameters for the instance
925 @param hvp: the hypervisor parameters for the instance
926 @type hypervisor_name: string
927 @param hypervisor_name: the hypervisor for the instance
929 @return: the hook environment for this instance
938 "INSTANCE_NAME": name,
939 "INSTANCE_PRIMARY": primary_node,
940 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
941 "INSTANCE_OS_TYPE": os_type,
942 "INSTANCE_STATUS": str_status,
943 "INSTANCE_MEMORY": memory,
944 "INSTANCE_VCPUS": vcpus,
945 "INSTANCE_DISK_TEMPLATE": disk_template,
946 "INSTANCE_HYPERVISOR": hypervisor_name,
950 nic_count = len(nics)
951 for idx, (ip, mac, mode, link) in enumerate(nics):
954 env["INSTANCE_NIC%d_IP" % idx] = ip
955 env["INSTANCE_NIC%d_MAC" % idx] = mac
956 env["INSTANCE_NIC%d_MODE" % idx] = mode
957 env["INSTANCE_NIC%d_LINK" % idx] = link
958 if mode == constants.NIC_MODE_BRIDGED:
959 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
963 env["INSTANCE_NIC_COUNT"] = nic_count
966 disk_count = len(disks)
967 for idx, (size, mode) in enumerate(disks):
968 env["INSTANCE_DISK%d_SIZE" % idx] = size
969 env["INSTANCE_DISK%d_MODE" % idx] = mode
973 env["INSTANCE_DISK_COUNT"] = disk_count
975 for source, kind in [(bep, "BE"), (hvp, "HV")]:
976 for key, value in source.items():
977 env["INSTANCE_%s_%s" % (kind, key)] = value
982 def _NICListToTuple(lu, nics):
983 """Build a list of nic information tuples.
985 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
986 value in LUQueryInstanceData.
988 @type lu: L{LogicalUnit}
989 @param lu: the logical unit on whose behalf we execute
990 @type nics: list of L{objects.NIC}
991 @param nics: list of nics to convert to hooks tuples
995 cluster = lu.cfg.GetClusterInfo()
999 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1000 mode = filled_params[constants.NIC_MODE]
1001 link = filled_params[constants.NIC_LINK]
1002 hooks_nics.append((ip, mac, mode, link))
1006 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1007 """Builds instance related env variables for hooks from an object.
1009 @type lu: L{LogicalUnit}
1010 @param lu: the logical unit on whose behalf we execute
1011 @type instance: L{objects.Instance}
1012 @param instance: the instance for which we should build the
1014 @type override: dict
1015 @param override: dictionary with key/values that will override
1018 @return: the hook environment dictionary
1021 cluster = lu.cfg.GetClusterInfo()
1022 bep = cluster.FillBE(instance)
1023 hvp = cluster.FillHV(instance)
1025 'name': instance.name,
1026 'primary_node': instance.primary_node,
1027 'secondary_nodes': instance.secondary_nodes,
1028 'os_type': instance.os,
1029 'status': instance.admin_up,
1030 'memory': bep[constants.BE_MEMORY],
1031 'vcpus': bep[constants.BE_VCPUS],
1032 'nics': _NICListToTuple(lu, instance.nics),
1033 'disk_template': instance.disk_template,
1034 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1037 'hypervisor_name': instance.hypervisor,
1040 args.update(override)
1041 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1044 def _AdjustCandidatePool(lu, exceptions):
1045 """Adjust the candidate pool after node operations.
1048 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1050 lu.LogInfo("Promoted nodes to master candidate role: %s",
1051 utils.CommaJoin(node.name for node in mod_list))
1052 for name in mod_list:
1053 lu.context.ReaddNode(name)
1054 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1056 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1060 def _DecideSelfPromotion(lu, exceptions=None):
1061 """Decide whether I should promote myself as a master candidate.
1064 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1065 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1066 # the new node will increase mc_max with one, so:
1067 mc_should = min(mc_should + 1, cp_size)
1068 return mc_now < mc_should
1071 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1072 """Check that the brigdes needed by a list of nics exist.
1075 cluster = lu.cfg.GetClusterInfo()
1076 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1077 brlist = [params[constants.NIC_LINK] for params in paramslist
1078 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1080 result = lu.rpc.call_bridges_exist(target_node, brlist)
1081 result.Raise("Error checking bridges on destination node '%s'" %
1082 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1085 def _CheckInstanceBridgesExist(lu, instance, node=None):
1086 """Check that the brigdes needed by an instance exist.
1090 node = instance.primary_node
1091 _CheckNicsBridgesExist(lu, instance.nics, node)
1094 def _CheckOSVariant(os_obj, name):
1095 """Check whether an OS name conforms to the os variants specification.
1097 @type os_obj: L{objects.OS}
1098 @param os_obj: OS object to check
1100 @param name: OS name passed by the user, to check for validity
1103 if not os_obj.supported_variants:
1105 variant = objects.OS.GetVariant(name)
1107 raise errors.OpPrereqError("OS name must include a variant",
1110 if variant not in os_obj.supported_variants:
1111 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1114 def _GetNodeInstancesInner(cfg, fn):
1115 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1118 def _GetNodeInstances(cfg, node_name):
1119 """Returns a list of all primary and secondary instances on a node.
1123 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1126 def _GetNodePrimaryInstances(cfg, node_name):
1127 """Returns primary instances on a node.
1130 return _GetNodeInstancesInner(cfg,
1131 lambda inst: node_name == inst.primary_node)
1134 def _GetNodeSecondaryInstances(cfg, node_name):
1135 """Returns secondary instances on a node.
1138 return _GetNodeInstancesInner(cfg,
1139 lambda inst: node_name in inst.secondary_nodes)
1142 def _GetStorageTypeArgs(cfg, storage_type):
1143 """Returns the arguments for a storage type.
1146 # Special case for file storage
1147 if storage_type == constants.ST_FILE:
1148 # storage.FileStorage wants a list of storage directories
1149 return [[cfg.GetFileStorageDir()]]
1154 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1157 for dev in instance.disks:
1158 cfg.SetDiskID(dev, node_name)
1160 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1161 result.Raise("Failed to get disk status from node %s" % node_name,
1162 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1164 for idx, bdev_status in enumerate(result.payload):
1165 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1171 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1172 """Check the sanity of iallocator and node arguments and use the
1173 cluster-wide iallocator if appropriate.
1175 Check that at most one of (iallocator, node) is specified. If none is
1176 specified, then the LU's opcode's iallocator slot is filled with the
1177 cluster-wide default iallocator.
1179 @type iallocator_slot: string
1180 @param iallocator_slot: the name of the opcode iallocator slot
1181 @type node_slot: string
1182 @param node_slot: the name of the opcode target node slot
1185 node = getattr(lu.op, node_slot, None)
1186 iallocator = getattr(lu.op, iallocator_slot, None)
1188 if node is not None and iallocator is not None:
1189 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1191 elif node is None and iallocator is None:
1192 default_iallocator = lu.cfg.GetDefaultIAllocator()
1193 if default_iallocator:
1194 setattr(lu.op, iallocator_slot, default_iallocator)
1196 raise errors.OpPrereqError("No iallocator or node given and no"
1197 " cluster-wide default iallocator found."
1198 " Please specify either an iallocator or a"
1199 " node, or set a cluster-wide default"
1203 class LUPostInitCluster(LogicalUnit):
1204 """Logical unit for running hooks after cluster initialization.
1207 HPATH = "cluster-init"
1208 HTYPE = constants.HTYPE_CLUSTER
1210 def BuildHooksEnv(self):
1214 env = {"OP_TARGET": self.cfg.GetClusterName()}
1215 mn = self.cfg.GetMasterNode()
1216 return env, [], [mn]
1218 def Exec(self, feedback_fn):
1225 class LUDestroyCluster(LogicalUnit):
1226 """Logical unit for destroying the cluster.
1229 HPATH = "cluster-destroy"
1230 HTYPE = constants.HTYPE_CLUSTER
1232 def BuildHooksEnv(self):
1236 env = {"OP_TARGET": self.cfg.GetClusterName()}
1239 def CheckPrereq(self):
1240 """Check prerequisites.
1242 This checks whether the cluster is empty.
1244 Any errors are signaled by raising errors.OpPrereqError.
1247 master = self.cfg.GetMasterNode()
1249 nodelist = self.cfg.GetNodeList()
1250 if len(nodelist) != 1 or nodelist[0] != master:
1251 raise errors.OpPrereqError("There are still %d node(s) in"
1252 " this cluster." % (len(nodelist) - 1),
1254 instancelist = self.cfg.GetInstanceList()
1256 raise errors.OpPrereqError("There are still %d instance(s) in"
1257 " this cluster." % len(instancelist),
1260 def Exec(self, feedback_fn):
1261 """Destroys the cluster.
1264 master = self.cfg.GetMasterNode()
1265 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1267 # Run post hooks on master node before it's removed
1268 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1270 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1272 # pylint: disable-msg=W0702
1273 self.LogWarning("Errors occurred running hooks on %s" % master)
1275 result = self.rpc.call_node_stop_master(master, False)
1276 result.Raise("Could not disable the master role")
1278 if modify_ssh_setup:
1279 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1280 utils.CreateBackup(priv_key)
1281 utils.CreateBackup(pub_key)
1286 def _VerifyCertificate(filename):
1287 """Verifies a certificate for LUVerifyCluster.
1289 @type filename: string
1290 @param filename: Path to PEM file
1294 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1295 utils.ReadFile(filename))
1296 except Exception, err: # pylint: disable-msg=W0703
1297 return (LUVerifyCluster.ETYPE_ERROR,
1298 "Failed to load X509 certificate %s: %s" % (filename, err))
1301 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1302 constants.SSL_CERT_EXPIRATION_ERROR)
1305 fnamemsg = "While verifying %s: %s" % (filename, msg)
1310 return (None, fnamemsg)
1311 elif errcode == utils.CERT_WARNING:
1312 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1313 elif errcode == utils.CERT_ERROR:
1314 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1316 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1319 class LUVerifyCluster(LogicalUnit):
1320 """Verifies the cluster status.
1323 HPATH = "cluster-verify"
1324 HTYPE = constants.HTYPE_CLUSTER
1326 ("skip_checks", _EmptyList,
1327 _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1328 ("verbose", False, _TBool),
1329 ("error_codes", False, _TBool),
1330 ("debug_simulate_errors", False, _TBool),
1334 TCLUSTER = "cluster"
1336 TINSTANCE = "instance"
1338 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1339 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1340 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1341 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1342 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1343 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1344 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1345 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1346 ENODEDRBD = (TNODE, "ENODEDRBD")
1347 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1348 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1349 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1350 ENODEHV = (TNODE, "ENODEHV")
1351 ENODELVM = (TNODE, "ENODELVM")
1352 ENODEN1 = (TNODE, "ENODEN1")
1353 ENODENET = (TNODE, "ENODENET")
1354 ENODEOS = (TNODE, "ENODEOS")
1355 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1356 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1357 ENODERPC = (TNODE, "ENODERPC")
1358 ENODESSH = (TNODE, "ENODESSH")
1359 ENODEVERSION = (TNODE, "ENODEVERSION")
1360 ENODESETUP = (TNODE, "ENODESETUP")
1361 ENODETIME = (TNODE, "ENODETIME")
1363 ETYPE_FIELD = "code"
1364 ETYPE_ERROR = "ERROR"
1365 ETYPE_WARNING = "WARNING"
1367 class NodeImage(object):
1368 """A class representing the logical and physical status of a node.
1371 @ivar name: the node name to which this object refers
1372 @ivar volumes: a structure as returned from
1373 L{ganeti.backend.GetVolumeList} (runtime)
1374 @ivar instances: a list of running instances (runtime)
1375 @ivar pinst: list of configured primary instances (config)
1376 @ivar sinst: list of configured secondary instances (config)
1377 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1378 of this node (config)
1379 @ivar mfree: free memory, as reported by hypervisor (runtime)
1380 @ivar dfree: free disk, as reported by the node (runtime)
1381 @ivar offline: the offline status (config)
1382 @type rpc_fail: boolean
1383 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1384 not whether the individual keys were correct) (runtime)
1385 @type lvm_fail: boolean
1386 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1387 @type hyp_fail: boolean
1388 @ivar hyp_fail: whether the RPC call didn't return the instance list
1389 @type ghost: boolean
1390 @ivar ghost: whether this is a known node or not (config)
1391 @type os_fail: boolean
1392 @ivar os_fail: whether the RPC call didn't return valid OS data
1394 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1397 def __init__(self, offline=False, name=None):
1406 self.offline = offline
1407 self.rpc_fail = False
1408 self.lvm_fail = False
1409 self.hyp_fail = False
1411 self.os_fail = False
1414 def ExpandNames(self):
1415 self.needed_locks = {
1416 locking.LEVEL_NODE: locking.ALL_SET,
1417 locking.LEVEL_INSTANCE: locking.ALL_SET,
1419 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1421 def _Error(self, ecode, item, msg, *args, **kwargs):
1422 """Format an error message.
1424 Based on the opcode's error_codes parameter, either format a
1425 parseable error code, or a simpler error string.
1427 This must be called only from Exec and functions called from Exec.
1430 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1432 # first complete the msg
1435 # then format the whole message
1436 if self.op.error_codes:
1437 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1443 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1444 # and finally report it via the feedback_fn
1445 self._feedback_fn(" - %s" % msg)
1447 def _ErrorIf(self, cond, *args, **kwargs):
1448 """Log an error message if the passed condition is True.
1451 cond = bool(cond) or self.op.debug_simulate_errors
1453 self._Error(*args, **kwargs)
1454 # do not mark the operation as failed for WARN cases only
1455 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1456 self.bad = self.bad or cond
1458 def _VerifyNode(self, ninfo, nresult):
1459 """Perform some basic validation on data returned from a node.
1461 - check the result data structure is well formed and has all the
1463 - check ganeti version
1465 @type ninfo: L{objects.Node}
1466 @param ninfo: the node to check
1467 @param nresult: the results from the node
1469 @return: whether overall this call was successful (and we can expect
1470 reasonable values in the respose)
1474 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1476 # main result, nresult should be a non-empty dict
1477 test = not nresult or not isinstance(nresult, dict)
1478 _ErrorIf(test, self.ENODERPC, node,
1479 "unable to verify node: no data returned")
1483 # compares ganeti version
1484 local_version = constants.PROTOCOL_VERSION
1485 remote_version = nresult.get("version", None)
1486 test = not (remote_version and
1487 isinstance(remote_version, (list, tuple)) and
1488 len(remote_version) == 2)
1489 _ErrorIf(test, self.ENODERPC, node,
1490 "connection to node returned invalid data")
1494 test = local_version != remote_version[0]
1495 _ErrorIf(test, self.ENODEVERSION, node,
1496 "incompatible protocol versions: master %s,"
1497 " node %s", local_version, remote_version[0])
1501 # node seems compatible, we can actually try to look into its results
1503 # full package version
1504 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1505 self.ENODEVERSION, node,
1506 "software version mismatch: master %s, node %s",
1507 constants.RELEASE_VERSION, remote_version[1],
1508 code=self.ETYPE_WARNING)
1510 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1511 if isinstance(hyp_result, dict):
1512 for hv_name, hv_result in hyp_result.iteritems():
1513 test = hv_result is not None
1514 _ErrorIf(test, self.ENODEHV, node,
1515 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1518 test = nresult.get(constants.NV_NODESETUP,
1519 ["Missing NODESETUP results"])
1520 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1525 def _VerifyNodeTime(self, ninfo, nresult,
1526 nvinfo_starttime, nvinfo_endtime):
1527 """Check the node time.
1529 @type ninfo: L{objects.Node}
1530 @param ninfo: the node to check
1531 @param nresult: the remote results for the node
1532 @param nvinfo_starttime: the start time of the RPC call
1533 @param nvinfo_endtime: the end time of the RPC call
1537 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1539 ntime = nresult.get(constants.NV_TIME, None)
1541 ntime_merged = utils.MergeTime(ntime)
1542 except (ValueError, TypeError):
1543 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1546 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1547 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1548 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1549 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1553 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1554 "Node time diverges by at least %s from master node time",
1557 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1558 """Check the node time.
1560 @type ninfo: L{objects.Node}
1561 @param ninfo: the node to check
1562 @param nresult: the remote results for the node
1563 @param vg_name: the configured VG name
1570 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1572 # checks vg existence and size > 20G
1573 vglist = nresult.get(constants.NV_VGLIST, None)
1575 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1577 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1578 constants.MIN_VG_SIZE)
1579 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1582 pvlist = nresult.get(constants.NV_PVLIST, None)
1583 test = pvlist is None
1584 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1586 # check that ':' is not present in PV names, since it's a
1587 # special character for lvcreate (denotes the range of PEs to
1589 for _, pvname, owner_vg in pvlist:
1590 test = ":" in pvname
1591 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1592 " '%s' of VG '%s'", pvname, owner_vg)
1594 def _VerifyNodeNetwork(self, ninfo, nresult):
1595 """Check the node time.
1597 @type ninfo: L{objects.Node}
1598 @param ninfo: the node to check
1599 @param nresult: the remote results for the node
1603 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1605 test = constants.NV_NODELIST not in nresult
1606 _ErrorIf(test, self.ENODESSH, node,
1607 "node hasn't returned node ssh connectivity data")
1609 if nresult[constants.NV_NODELIST]:
1610 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1611 _ErrorIf(True, self.ENODESSH, node,
1612 "ssh communication with node '%s': %s", a_node, a_msg)
1614 test = constants.NV_NODENETTEST not in nresult
1615 _ErrorIf(test, self.ENODENET, node,
1616 "node hasn't returned node tcp connectivity data")
1618 if nresult[constants.NV_NODENETTEST]:
1619 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1621 _ErrorIf(True, self.ENODENET, node,
1622 "tcp communication with node '%s': %s",
1623 anode, nresult[constants.NV_NODENETTEST][anode])
1625 test = constants.NV_MASTERIP not in nresult
1626 _ErrorIf(test, self.ENODENET, node,
1627 "node hasn't returned node master IP reachability data")
1629 if not nresult[constants.NV_MASTERIP]:
1630 if node == self.master_node:
1631 msg = "the master node cannot reach the master IP (not configured?)"
1633 msg = "cannot reach the master IP"
1634 _ErrorIf(True, self.ENODENET, node, msg)
1637 def _VerifyInstance(self, instance, instanceconfig, node_image):
1638 """Verify an instance.
1640 This function checks to see if the required block devices are
1641 available on the instance's node.
1644 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1645 node_current = instanceconfig.primary_node
1647 node_vol_should = {}
1648 instanceconfig.MapLVsByNode(node_vol_should)
1650 for node in node_vol_should:
1651 n_img = node_image[node]
1652 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1653 # ignore missing volumes on offline or broken nodes
1655 for volume in node_vol_should[node]:
1656 test = volume not in n_img.volumes
1657 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1658 "volume %s missing on node %s", volume, node)
1660 if instanceconfig.admin_up:
1661 pri_img = node_image[node_current]
1662 test = instance not in pri_img.instances and not pri_img.offline
1663 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1664 "instance not running on its primary node %s",
1667 for node, n_img in node_image.items():
1668 if (not node == node_current):
1669 test = instance in n_img.instances
1670 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1671 "instance should not run on node %s", node)
1673 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1674 """Verify if there are any unknown volumes in the cluster.
1676 The .os, .swap and backup volumes are ignored. All other volumes are
1677 reported as unknown.
1679 @type reserved: L{ganeti.utils.FieldSet}
1680 @param reserved: a FieldSet of reserved volume names
1683 for node, n_img in node_image.items():
1684 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1685 # skip non-healthy nodes
1687 for volume in n_img.volumes:
1688 test = ((node not in node_vol_should or
1689 volume not in node_vol_should[node]) and
1690 not reserved.Matches(volume))
1691 self._ErrorIf(test, self.ENODEORPHANLV, node,
1692 "volume %s is unknown", volume)
1694 def _VerifyOrphanInstances(self, instancelist, node_image):
1695 """Verify the list of running instances.
1697 This checks what instances are running but unknown to the cluster.
1700 for node, n_img in node_image.items():
1701 for o_inst in n_img.instances:
1702 test = o_inst not in instancelist
1703 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1704 "instance %s on node %s should not exist", o_inst, node)
1706 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1707 """Verify N+1 Memory Resilience.
1709 Check that if one single node dies we can still start all the
1710 instances it was primary for.
1713 for node, n_img in node_image.items():
1714 # This code checks that every node which is now listed as
1715 # secondary has enough memory to host all instances it is
1716 # supposed to should a single other node in the cluster fail.
1717 # FIXME: not ready for failover to an arbitrary node
1718 # FIXME: does not support file-backed instances
1719 # WARNING: we currently take into account down instances as well
1720 # as up ones, considering that even if they're down someone
1721 # might want to start them even in the event of a node failure.
1722 for prinode, instances in n_img.sbp.items():
1724 for instance in instances:
1725 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1726 if bep[constants.BE_AUTO_BALANCE]:
1727 needed_mem += bep[constants.BE_MEMORY]
1728 test = n_img.mfree < needed_mem
1729 self._ErrorIf(test, self.ENODEN1, node,
1730 "not enough memory on to accommodate"
1731 " failovers should peer node %s fail", prinode)
1733 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1735 """Verifies and computes the node required file checksums.
1737 @type ninfo: L{objects.Node}
1738 @param ninfo: the node to check
1739 @param nresult: the remote results for the node
1740 @param file_list: required list of files
1741 @param local_cksum: dictionary of local files and their checksums
1742 @param master_files: list of files that only masters should have
1746 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1748 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1749 test = not isinstance(remote_cksum, dict)
1750 _ErrorIf(test, self.ENODEFILECHECK, node,
1751 "node hasn't returned file checksum data")
1755 for file_name in file_list:
1756 node_is_mc = ninfo.master_candidate
1757 must_have = (file_name not in master_files) or node_is_mc
1759 test1 = file_name not in remote_cksum
1761 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1763 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1764 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1765 "file '%s' missing", file_name)
1766 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1767 "file '%s' has wrong checksum", file_name)
1768 # not candidate and this is not a must-have file
1769 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1770 "file '%s' should not exist on non master"
1771 " candidates (and the file is outdated)", file_name)
1772 # all good, except non-master/non-must have combination
1773 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1774 "file '%s' should not exist"
1775 " on non master candidates", file_name)
1777 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1779 """Verifies and the node DRBD status.
1781 @type ninfo: L{objects.Node}
1782 @param ninfo: the node to check
1783 @param nresult: the remote results for the node
1784 @param instanceinfo: the dict of instances
1785 @param drbd_helper: the configured DRBD usermode helper
1786 @param drbd_map: the DRBD map as returned by
1787 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1791 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1794 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1795 test = (helper_result == None)
1796 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1797 "no drbd usermode helper returned")
1799 status, payload = helper_result
1801 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1802 "drbd usermode helper check unsuccessful: %s", payload)
1803 test = status and (payload != drbd_helper)
1804 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1805 "wrong drbd usermode helper: %s", payload)
1807 # compute the DRBD minors
1809 for minor, instance in drbd_map[node].items():
1810 test = instance not in instanceinfo
1811 _ErrorIf(test, self.ECLUSTERCFG, None,
1812 "ghost instance '%s' in temporary DRBD map", instance)
1813 # ghost instance should not be running, but otherwise we
1814 # don't give double warnings (both ghost instance and
1815 # unallocated minor in use)
1817 node_drbd[minor] = (instance, False)
1819 instance = instanceinfo[instance]
1820 node_drbd[minor] = (instance.name, instance.admin_up)
1822 # and now check them
1823 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1824 test = not isinstance(used_minors, (tuple, list))
1825 _ErrorIf(test, self.ENODEDRBD, node,
1826 "cannot parse drbd status file: %s", str(used_minors))
1828 # we cannot check drbd status
1831 for minor, (iname, must_exist) in node_drbd.items():
1832 test = minor not in used_minors and must_exist
1833 _ErrorIf(test, self.ENODEDRBD, node,
1834 "drbd minor %d of instance %s is not active", minor, iname)
1835 for minor in used_minors:
1836 test = minor not in node_drbd
1837 _ErrorIf(test, self.ENODEDRBD, node,
1838 "unallocated drbd minor %d is in use", minor)
1840 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1841 """Builds the node OS structures.
1843 @type ninfo: L{objects.Node}
1844 @param ninfo: the node to check
1845 @param nresult: the remote results for the node
1846 @param nimg: the node image object
1850 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1852 remote_os = nresult.get(constants.NV_OSLIST, None)
1853 test = (not isinstance(remote_os, list) or
1854 not compat.all(isinstance(v, list) and len(v) == 7
1855 for v in remote_os))
1857 _ErrorIf(test, self.ENODEOS, node,
1858 "node hasn't returned valid OS data")
1867 for (name, os_path, status, diagnose,
1868 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1870 if name not in os_dict:
1873 # parameters is a list of lists instead of list of tuples due to
1874 # JSON lacking a real tuple type, fix it:
1875 parameters = [tuple(v) for v in parameters]
1876 os_dict[name].append((os_path, status, diagnose,
1877 set(variants), set(parameters), set(api_ver)))
1879 nimg.oslist = os_dict
1881 def _VerifyNodeOS(self, ninfo, nimg, base):
1882 """Verifies the node OS list.
1884 @type ninfo: L{objects.Node}
1885 @param ninfo: the node to check
1886 @param nimg: the node image object
1887 @param base: the 'template' node we match against (e.g. from the master)
1891 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1893 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1895 for os_name, os_data in nimg.oslist.items():
1896 assert os_data, "Empty OS status for OS %s?!" % os_name
1897 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1898 _ErrorIf(not f_status, self.ENODEOS, node,
1899 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1900 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1901 "OS '%s' has multiple entries (first one shadows the rest): %s",
1902 os_name, utils.CommaJoin([v[0] for v in os_data]))
1903 # this will catched in backend too
1904 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1905 and not f_var, self.ENODEOS, node,
1906 "OS %s with API at least %d does not declare any variant",
1907 os_name, constants.OS_API_V15)
1908 # comparisons with the 'base' image
1909 test = os_name not in base.oslist
1910 _ErrorIf(test, self.ENODEOS, node,
1911 "Extra OS %s not present on reference node (%s)",
1915 assert base.oslist[os_name], "Base node has empty OS status?"
1916 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1918 # base OS is invalid, skipping
1920 for kind, a, b in [("API version", f_api, b_api),
1921 ("variants list", f_var, b_var),
1922 ("parameters", f_param, b_param)]:
1923 _ErrorIf(a != b, self.ENODEOS, node,
1924 "OS %s %s differs from reference node %s: %s vs. %s",
1925 kind, os_name, base.name,
1926 utils.CommaJoin(a), utils.CommaJoin(b))
1928 # check any missing OSes
1929 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1930 _ErrorIf(missing, self.ENODEOS, node,
1931 "OSes present on reference node %s but missing on this node: %s",
1932 base.name, utils.CommaJoin(missing))
1934 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1935 """Verifies and updates the node volume data.
1937 This function will update a L{NodeImage}'s internal structures
1938 with data from the remote call.
1940 @type ninfo: L{objects.Node}
1941 @param ninfo: the node to check
1942 @param nresult: the remote results for the node
1943 @param nimg: the node image object
1944 @param vg_name: the configured VG name
1948 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1950 nimg.lvm_fail = True
1951 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1954 elif isinstance(lvdata, basestring):
1955 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1956 utils.SafeEncode(lvdata))
1957 elif not isinstance(lvdata, dict):
1958 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1960 nimg.volumes = lvdata
1961 nimg.lvm_fail = False
1963 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1964 """Verifies and updates the node instance list.
1966 If the listing was successful, then updates this node's instance
1967 list. Otherwise, it marks the RPC call as failed for the instance
1970 @type ninfo: L{objects.Node}
1971 @param ninfo: the node to check
1972 @param nresult: the remote results for the node
1973 @param nimg: the node image object
1976 idata = nresult.get(constants.NV_INSTANCELIST, None)
1977 test = not isinstance(idata, list)
1978 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1979 " (instancelist): %s", utils.SafeEncode(str(idata)))
1981 nimg.hyp_fail = True
1983 nimg.instances = idata
1985 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1986 """Verifies and computes a node information map
1988 @type ninfo: L{objects.Node}
1989 @param ninfo: the node to check
1990 @param nresult: the remote results for the node
1991 @param nimg: the node image object
1992 @param vg_name: the configured VG name
1996 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1998 # try to read free memory (from the hypervisor)
1999 hv_info = nresult.get(constants.NV_HVINFO, None)
2000 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2001 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2004 nimg.mfree = int(hv_info["memory_free"])
2005 except (ValueError, TypeError):
2006 _ErrorIf(True, self.ENODERPC, node,
2007 "node returned invalid nodeinfo, check hypervisor")
2009 # FIXME: devise a free space model for file based instances as well
2010 if vg_name is not None:
2011 test = (constants.NV_VGLIST not in nresult or
2012 vg_name not in nresult[constants.NV_VGLIST])
2013 _ErrorIf(test, self.ENODELVM, node,
2014 "node didn't return data for the volume group '%s'"
2015 " - it is either missing or broken", vg_name)
2018 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2019 except (ValueError, TypeError):
2020 _ErrorIf(True, self.ENODERPC, node,
2021 "node returned invalid LVM info, check LVM status")
2023 def BuildHooksEnv(self):
2026 Cluster-Verify hooks just ran in the post phase and their failure makes
2027 the output be logged in the verify output and the verification to fail.
2030 all_nodes = self.cfg.GetNodeList()
2032 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2034 for node in self.cfg.GetAllNodesInfo().values():
2035 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2037 return env, [], all_nodes
2039 def Exec(self, feedback_fn):
2040 """Verify integrity of cluster, performing various test on nodes.
2044 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2045 verbose = self.op.verbose
2046 self._feedback_fn = feedback_fn
2047 feedback_fn("* Verifying global settings")
2048 for msg in self.cfg.VerifyConfig():
2049 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2051 # Check the cluster certificates
2052 for cert_filename in constants.ALL_CERT_FILES:
2053 (errcode, msg) = _VerifyCertificate(cert_filename)
2054 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2056 vg_name = self.cfg.GetVGName()
2057 drbd_helper = self.cfg.GetDRBDHelper()
2058 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2059 cluster = self.cfg.GetClusterInfo()
2060 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2061 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2062 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2063 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2064 for iname in instancelist)
2065 i_non_redundant = [] # Non redundant instances
2066 i_non_a_balanced = [] # Non auto-balanced instances
2067 n_offline = 0 # Count of offline nodes
2068 n_drained = 0 # Count of nodes being drained
2069 node_vol_should = {}
2071 # FIXME: verify OS list
2072 # do local checksums
2073 master_files = [constants.CLUSTER_CONF_FILE]
2074 master_node = self.master_node = self.cfg.GetMasterNode()
2075 master_ip = self.cfg.GetMasterIP()
2077 file_names = ssconf.SimpleStore().GetFileList()
2078 file_names.extend(constants.ALL_CERT_FILES)
2079 file_names.extend(master_files)
2080 if cluster.modify_etc_hosts:
2081 file_names.append(constants.ETC_HOSTS)
2083 local_checksums = utils.FingerprintFiles(file_names)
2085 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2086 node_verify_param = {
2087 constants.NV_FILELIST: file_names,
2088 constants.NV_NODELIST: [node.name for node in nodeinfo
2089 if not node.offline],
2090 constants.NV_HYPERVISOR: hypervisors,
2091 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2092 node.secondary_ip) for node in nodeinfo
2093 if not node.offline],
2094 constants.NV_INSTANCELIST: hypervisors,
2095 constants.NV_VERSION: None,
2096 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2097 constants.NV_NODESETUP: None,
2098 constants.NV_TIME: None,
2099 constants.NV_MASTERIP: (master_node, master_ip),
2100 constants.NV_OSLIST: None,
2103 if vg_name is not None:
2104 node_verify_param[constants.NV_VGLIST] = None
2105 node_verify_param[constants.NV_LVLIST] = vg_name
2106 node_verify_param[constants.NV_PVLIST] = [vg_name]
2107 node_verify_param[constants.NV_DRBDLIST] = None
2110 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2112 # Build our expected cluster state
2113 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2115 for node in nodeinfo)
2117 for instance in instancelist:
2118 inst_config = instanceinfo[instance]
2120 for nname in inst_config.all_nodes:
2121 if nname not in node_image:
2123 gnode = self.NodeImage(name=nname)
2125 node_image[nname] = gnode
2127 inst_config.MapLVsByNode(node_vol_should)
2129 pnode = inst_config.primary_node
2130 node_image[pnode].pinst.append(instance)
2132 for snode in inst_config.secondary_nodes:
2133 nimg = node_image[snode]
2134 nimg.sinst.append(instance)
2135 if pnode not in nimg.sbp:
2136 nimg.sbp[pnode] = []
2137 nimg.sbp[pnode].append(instance)
2139 # At this point, we have the in-memory data structures complete,
2140 # except for the runtime information, which we'll gather next
2142 # Due to the way our RPC system works, exact response times cannot be
2143 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2144 # time before and after executing the request, we can at least have a time
2146 nvinfo_starttime = time.time()
2147 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2148 self.cfg.GetClusterName())
2149 nvinfo_endtime = time.time()
2151 all_drbd_map = self.cfg.ComputeDRBDMap()
2153 feedback_fn("* Verifying node status")
2157 for node_i in nodeinfo:
2159 nimg = node_image[node]
2163 feedback_fn("* Skipping offline node %s" % (node,))
2167 if node == master_node:
2169 elif node_i.master_candidate:
2170 ntype = "master candidate"
2171 elif node_i.drained:
2177 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2179 msg = all_nvinfo[node].fail_msg
2180 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2182 nimg.rpc_fail = True
2185 nresult = all_nvinfo[node].payload
2187 nimg.call_ok = self._VerifyNode(node_i, nresult)
2188 self._VerifyNodeNetwork(node_i, nresult)
2189 self._VerifyNodeLVM(node_i, nresult, vg_name)
2190 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2192 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2194 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2196 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2197 self._UpdateNodeInstances(node_i, nresult, nimg)
2198 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2199 self._UpdateNodeOS(node_i, nresult, nimg)
2200 if not nimg.os_fail:
2201 if refos_img is None:
2203 self._VerifyNodeOS(node_i, nimg, refos_img)
2205 feedback_fn("* Verifying instance status")
2206 for instance in instancelist:
2208 feedback_fn("* Verifying instance %s" % instance)
2209 inst_config = instanceinfo[instance]
2210 self._VerifyInstance(instance, inst_config, node_image)
2211 inst_nodes_offline = []
2213 pnode = inst_config.primary_node
2214 pnode_img = node_image[pnode]
2215 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2216 self.ENODERPC, pnode, "instance %s, connection to"
2217 " primary node failed", instance)
2219 if pnode_img.offline:
2220 inst_nodes_offline.append(pnode)
2222 # If the instance is non-redundant we cannot survive losing its primary
2223 # node, so we are not N+1 compliant. On the other hand we have no disk
2224 # templates with more than one secondary so that situation is not well
2226 # FIXME: does not support file-backed instances
2227 if not inst_config.secondary_nodes:
2228 i_non_redundant.append(instance)
2229 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2230 instance, "instance has multiple secondary nodes: %s",
2231 utils.CommaJoin(inst_config.secondary_nodes),
2232 code=self.ETYPE_WARNING)
2234 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2235 i_non_a_balanced.append(instance)
2237 for snode in inst_config.secondary_nodes:
2238 s_img = node_image[snode]
2239 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2240 "instance %s, connection to secondary node failed", instance)
2243 inst_nodes_offline.append(snode)
2245 # warn that the instance lives on offline nodes
2246 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2247 "instance lives on offline node(s) %s",
2248 utils.CommaJoin(inst_nodes_offline))
2249 # ... or ghost nodes
2250 for node in inst_config.all_nodes:
2251 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2252 "instance lives on ghost node %s", node)
2254 feedback_fn("* Verifying orphan volumes")
2255 reserved = utils.FieldSet(*cluster.reserved_lvs)
2256 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2258 feedback_fn("* Verifying orphan instances")
2259 self._VerifyOrphanInstances(instancelist, node_image)
2261 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2262 feedback_fn("* Verifying N+1 Memory redundancy")
2263 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2265 feedback_fn("* Other Notes")
2267 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2268 % len(i_non_redundant))
2270 if i_non_a_balanced:
2271 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2272 % len(i_non_a_balanced))
2275 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2278 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2282 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2283 """Analyze the post-hooks' result
2285 This method analyses the hook result, handles it, and sends some
2286 nicely-formatted feedback back to the user.
2288 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2289 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2290 @param hooks_results: the results of the multi-node hooks rpc call
2291 @param feedback_fn: function used send feedback back to the caller
2292 @param lu_result: previous Exec result
2293 @return: the new Exec result, based on the previous result
2297 # We only really run POST phase hooks, and are only interested in
2299 if phase == constants.HOOKS_PHASE_POST:
2300 # Used to change hooks' output to proper indentation
2301 indent_re = re.compile('^', re.M)
2302 feedback_fn("* Hooks Results")
2303 assert hooks_results, "invalid result from hooks"
2305 for node_name in hooks_results:
2306 res = hooks_results[node_name]
2308 test = msg and not res.offline
2309 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2310 "Communication failure in hooks execution: %s", msg)
2311 if res.offline or msg:
2312 # No need to investigate payload if node is offline or gave an error.
2313 # override manually lu_result here as _ErrorIf only
2314 # overrides self.bad
2317 for script, hkr, output in res.payload:
2318 test = hkr == constants.HKR_FAIL
2319 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2320 "Script %s failed, output:", script)
2322 output = indent_re.sub(' ', output)
2323 feedback_fn("%s" % output)
2329 class LUVerifyDisks(NoHooksLU):
2330 """Verifies the cluster disks status.
2335 def ExpandNames(self):
2336 self.needed_locks = {
2337 locking.LEVEL_NODE: locking.ALL_SET,
2338 locking.LEVEL_INSTANCE: locking.ALL_SET,
2340 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2342 def Exec(self, feedback_fn):
2343 """Verify integrity of cluster disks.
2345 @rtype: tuple of three items
2346 @return: a tuple of (dict of node-to-node_error, list of instances
2347 which need activate-disks, dict of instance: (node, volume) for
2351 result = res_nodes, res_instances, res_missing = {}, [], {}
2353 vg_name = self.cfg.GetVGName()
2354 nodes = utils.NiceSort(self.cfg.GetNodeList())
2355 instances = [self.cfg.GetInstanceInfo(name)
2356 for name in self.cfg.GetInstanceList()]
2359 for inst in instances:
2361 if (not inst.admin_up or
2362 inst.disk_template not in constants.DTS_NET_MIRROR):
2364 inst.MapLVsByNode(inst_lvs)
2365 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2366 for node, vol_list in inst_lvs.iteritems():
2367 for vol in vol_list:
2368 nv_dict[(node, vol)] = inst
2373 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2377 node_res = node_lvs[node]
2378 if node_res.offline:
2380 msg = node_res.fail_msg
2382 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2383 res_nodes[node] = msg
2386 lvs = node_res.payload
2387 for lv_name, (_, _, lv_online) in lvs.items():
2388 inst = nv_dict.pop((node, lv_name), None)
2389 if (not lv_online and inst is not None
2390 and inst.name not in res_instances):
2391 res_instances.append(inst.name)
2393 # any leftover items in nv_dict are missing LVs, let's arrange the
2395 for key, inst in nv_dict.iteritems():
2396 if inst.name not in res_missing:
2397 res_missing[inst.name] = []
2398 res_missing[inst.name].append(key)
2403 class LURepairDiskSizes(NoHooksLU):
2404 """Verifies the cluster disks sizes.
2407 _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2410 def ExpandNames(self):
2411 if self.op.instances:
2412 self.wanted_names = []
2413 for name in self.op.instances:
2414 full_name = _ExpandInstanceName(self.cfg, name)
2415 self.wanted_names.append(full_name)
2416 self.needed_locks = {
2417 locking.LEVEL_NODE: [],
2418 locking.LEVEL_INSTANCE: self.wanted_names,
2420 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2422 self.wanted_names = None
2423 self.needed_locks = {
2424 locking.LEVEL_NODE: locking.ALL_SET,
2425 locking.LEVEL_INSTANCE: locking.ALL_SET,
2427 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2429 def DeclareLocks(self, level):
2430 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2431 self._LockInstancesNodes(primary_only=True)
2433 def CheckPrereq(self):
2434 """Check prerequisites.
2436 This only checks the optional instance list against the existing names.
2439 if self.wanted_names is None:
2440 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2442 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2443 in self.wanted_names]
2445 def _EnsureChildSizes(self, disk):
2446 """Ensure children of the disk have the needed disk size.
2448 This is valid mainly for DRBD8 and fixes an issue where the
2449 children have smaller disk size.
2451 @param disk: an L{ganeti.objects.Disk} object
2454 if disk.dev_type == constants.LD_DRBD8:
2455 assert disk.children, "Empty children for DRBD8?"
2456 fchild = disk.children[0]
2457 mismatch = fchild.size < disk.size
2459 self.LogInfo("Child disk has size %d, parent %d, fixing",
2460 fchild.size, disk.size)
2461 fchild.size = disk.size
2463 # and we recurse on this child only, not on the metadev
2464 return self._EnsureChildSizes(fchild) or mismatch
2468 def Exec(self, feedback_fn):
2469 """Verify the size of cluster disks.
2472 # TODO: check child disks too
2473 # TODO: check differences in size between primary/secondary nodes
2475 for instance in self.wanted_instances:
2476 pnode = instance.primary_node
2477 if pnode not in per_node_disks:
2478 per_node_disks[pnode] = []
2479 for idx, disk in enumerate(instance.disks):
2480 per_node_disks[pnode].append((instance, idx, disk))
2483 for node, dskl in per_node_disks.items():
2484 newl = [v[2].Copy() for v in dskl]
2486 self.cfg.SetDiskID(dsk, node)
2487 result = self.rpc.call_blockdev_getsizes(node, newl)
2489 self.LogWarning("Failure in blockdev_getsizes call to node"
2490 " %s, ignoring", node)
2492 if len(result.data) != len(dskl):
2493 self.LogWarning("Invalid result from node %s, ignoring node results",
2496 for ((instance, idx, disk), size) in zip(dskl, result.data):
2498 self.LogWarning("Disk %d of instance %s did not return size"
2499 " information, ignoring", idx, instance.name)
2501 if not isinstance(size, (int, long)):
2502 self.LogWarning("Disk %d of instance %s did not return valid"
2503 " size information, ignoring", idx, instance.name)
2506 if size != disk.size:
2507 self.LogInfo("Disk %d of instance %s has mismatched size,"
2508 " correcting: recorded %d, actual %d", idx,
2509 instance.name, disk.size, size)
2511 self.cfg.Update(instance, feedback_fn)
2512 changed.append((instance.name, idx, size))
2513 if self._EnsureChildSizes(disk):
2514 self.cfg.Update(instance, feedback_fn)
2515 changed.append((instance.name, idx, disk.size))
2519 class LURenameCluster(LogicalUnit):
2520 """Rename the cluster.
2523 HPATH = "cluster-rename"
2524 HTYPE = constants.HTYPE_CLUSTER
2525 _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2527 def BuildHooksEnv(self):
2532 "OP_TARGET": self.cfg.GetClusterName(),
2533 "NEW_NAME": self.op.name,
2535 mn = self.cfg.GetMasterNode()
2536 all_nodes = self.cfg.GetNodeList()
2537 return env, [mn], all_nodes
2539 def CheckPrereq(self):
2540 """Verify that the passed name is a valid one.
2543 hostname = netutils.GetHostInfo(self.op.name)
2545 new_name = hostname.name
2546 self.ip = new_ip = hostname.ip
2547 old_name = self.cfg.GetClusterName()
2548 old_ip = self.cfg.GetMasterIP()
2549 if new_name == old_name and new_ip == old_ip:
2550 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2551 " cluster has changed",
2553 if new_ip != old_ip:
2554 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2555 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2556 " reachable on the network. Aborting." %
2557 new_ip, errors.ECODE_NOTUNIQUE)
2559 self.op.name = new_name
2561 def Exec(self, feedback_fn):
2562 """Rename the cluster.
2565 clustername = self.op.name
2568 # shutdown the master IP
2569 master = self.cfg.GetMasterNode()
2570 result = self.rpc.call_node_stop_master(master, False)
2571 result.Raise("Could not disable the master role")
2574 cluster = self.cfg.GetClusterInfo()
2575 cluster.cluster_name = clustername
2576 cluster.master_ip = ip
2577 self.cfg.Update(cluster, feedback_fn)
2579 # update the known hosts file
2580 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2581 node_list = self.cfg.GetNodeList()
2583 node_list.remove(master)
2586 result = self.rpc.call_upload_file(node_list,
2587 constants.SSH_KNOWN_HOSTS_FILE)
2588 for to_node, to_result in result.iteritems():
2589 msg = to_result.fail_msg
2591 msg = ("Copy of file %s to node %s failed: %s" %
2592 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2593 self.proc.LogWarning(msg)
2596 result = self.rpc.call_node_start_master(master, False, False)
2597 msg = result.fail_msg
2599 self.LogWarning("Could not re-enable the master role on"
2600 " the master, please restart manually: %s", msg)
2605 class LUSetClusterParams(LogicalUnit):
2606 """Change the parameters of the cluster.
2609 HPATH = "cluster-modify"
2610 HTYPE = constants.HTYPE_CLUSTER
2612 ("vg_name", None, _TMaybeString),
2613 ("enabled_hypervisors", None,
2614 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2615 ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2616 ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2617 ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2618 ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2619 ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2620 ("uid_pool", None, _NoType),
2621 ("add_uids", None, _NoType),
2622 ("remove_uids", None, _NoType),
2623 ("maintain_node_health", None, _TMaybeBool),
2624 ("nicparams", None, _TOr(_TDict, _TNone)),
2625 ("drbd_helper", None, _TOr(_TString, _TNone)),
2626 ("default_iallocator", None, _TMaybeString),
2627 ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2628 ("hidden_oss", None, _TOr(_TListOf(\
2631 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2633 ("blacklisted_oss", None, _TOr(_TListOf(\
2636 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2641 def CheckArguments(self):
2645 if self.op.uid_pool:
2646 uidpool.CheckUidPool(self.op.uid_pool)
2648 if self.op.add_uids:
2649 uidpool.CheckUidPool(self.op.add_uids)
2651 if self.op.remove_uids:
2652 uidpool.CheckUidPool(self.op.remove_uids)
2654 def ExpandNames(self):
2655 # FIXME: in the future maybe other cluster params won't require checking on
2656 # all nodes to be modified.
2657 self.needed_locks = {
2658 locking.LEVEL_NODE: locking.ALL_SET,
2660 self.share_locks[locking.LEVEL_NODE] = 1
2662 def BuildHooksEnv(self):
2667 "OP_TARGET": self.cfg.GetClusterName(),
2668 "NEW_VG_NAME": self.op.vg_name,
2670 mn = self.cfg.GetMasterNode()
2671 return env, [mn], [mn]
2673 def CheckPrereq(self):
2674 """Check prerequisites.
2676 This checks whether the given params don't conflict and
2677 if the given volume group is valid.
2680 if self.op.vg_name is not None and not self.op.vg_name:
2681 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2682 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2683 " instances exist", errors.ECODE_INVAL)
2685 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2686 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2687 raise errors.OpPrereqError("Cannot disable drbd helper while"
2688 " drbd-based instances exist",
2691 node_list = self.acquired_locks[locking.LEVEL_NODE]
2693 # if vg_name not None, checks given volume group on all nodes
2695 vglist = self.rpc.call_vg_list(node_list)
2696 for node in node_list:
2697 msg = vglist[node].fail_msg
2699 # ignoring down node
2700 self.LogWarning("Error while gathering data on node %s"
2701 " (ignoring node): %s", node, msg)
2703 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2705 constants.MIN_VG_SIZE)
2707 raise errors.OpPrereqError("Error on node '%s': %s" %
2708 (node, vgstatus), errors.ECODE_ENVIRON)
2710 if self.op.drbd_helper:
2711 # checks given drbd helper on all nodes
2712 helpers = self.rpc.call_drbd_helper(node_list)
2713 for node in node_list:
2714 ninfo = self.cfg.GetNodeInfo(node)
2716 self.LogInfo("Not checking drbd helper on offline node %s", node)
2718 msg = helpers[node].fail_msg
2720 raise errors.OpPrereqError("Error checking drbd helper on node"
2721 " '%s': %s" % (node, msg),
2722 errors.ECODE_ENVIRON)
2723 node_helper = helpers[node].payload
2724 if node_helper != self.op.drbd_helper:
2725 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2726 (node, node_helper), errors.ECODE_ENVIRON)
2728 self.cluster = cluster = self.cfg.GetClusterInfo()
2729 # validate params changes
2730 if self.op.beparams:
2731 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2732 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2734 if self.op.nicparams:
2735 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2736 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2737 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2740 # check all instances for consistency
2741 for instance in self.cfg.GetAllInstancesInfo().values():
2742 for nic_idx, nic in enumerate(instance.nics):
2743 params_copy = copy.deepcopy(nic.nicparams)
2744 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2746 # check parameter syntax
2748 objects.NIC.CheckParameterSyntax(params_filled)
2749 except errors.ConfigurationError, err:
2750 nic_errors.append("Instance %s, nic/%d: %s" %
2751 (instance.name, nic_idx, err))
2753 # if we're moving instances to routed, check that they have an ip
2754 target_mode = params_filled[constants.NIC_MODE]
2755 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2756 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2757 (instance.name, nic_idx))
2759 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2760 "\n".join(nic_errors))
2762 # hypervisor list/parameters
2763 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2764 if self.op.hvparams:
2765 for hv_name, hv_dict in self.op.hvparams.items():
2766 if hv_name not in self.new_hvparams:
2767 self.new_hvparams[hv_name] = hv_dict
2769 self.new_hvparams[hv_name].update(hv_dict)
2771 # os hypervisor parameters
2772 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2774 for os_name, hvs in self.op.os_hvp.items():
2775 if os_name not in self.new_os_hvp:
2776 self.new_os_hvp[os_name] = hvs
2778 for hv_name, hv_dict in hvs.items():
2779 if hv_name not in self.new_os_hvp[os_name]:
2780 self.new_os_hvp[os_name][hv_name] = hv_dict
2782 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2785 self.new_osp = objects.FillDict(cluster.osparams, {})
2786 if self.op.osparams:
2787 for os_name, osp in self.op.osparams.items():
2788 if os_name not in self.new_osp:
2789 self.new_osp[os_name] = {}
2791 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2794 if not self.new_osp[os_name]:
2795 # we removed all parameters
2796 del self.new_osp[os_name]
2798 # check the parameter validity (remote check)
2799 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2800 os_name, self.new_osp[os_name])
2802 # changes to the hypervisor list
2803 if self.op.enabled_hypervisors is not None:
2804 self.hv_list = self.op.enabled_hypervisors
2805 for hv in self.hv_list:
2806 # if the hypervisor doesn't already exist in the cluster
2807 # hvparams, we initialize it to empty, and then (in both
2808 # cases) we make sure to fill the defaults, as we might not
2809 # have a complete defaults list if the hypervisor wasn't
2811 if hv not in new_hvp:
2813 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2814 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2816 self.hv_list = cluster.enabled_hypervisors
2818 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2819 # either the enabled list has changed, or the parameters have, validate
2820 for hv_name, hv_params in self.new_hvparams.items():
2821 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2822 (self.op.enabled_hypervisors and
2823 hv_name in self.op.enabled_hypervisors)):
2824 # either this is a new hypervisor, or its parameters have changed
2825 hv_class = hypervisor.GetHypervisor(hv_name)
2826 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2827 hv_class.CheckParameterSyntax(hv_params)
2828 _CheckHVParams(self, node_list, hv_name, hv_params)
2831 # no need to check any newly-enabled hypervisors, since the
2832 # defaults have already been checked in the above code-block
2833 for os_name, os_hvp in self.new_os_hvp.items():
2834 for hv_name, hv_params in os_hvp.items():
2835 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2836 # we need to fill in the new os_hvp on top of the actual hv_p
2837 cluster_defaults = self.new_hvparams.get(hv_name, {})
2838 new_osp = objects.FillDict(cluster_defaults, hv_params)
2839 hv_class = hypervisor.GetHypervisor(hv_name)
2840 hv_class.CheckParameterSyntax(new_osp)
2841 _CheckHVParams(self, node_list, hv_name, new_osp)
2843 if self.op.default_iallocator:
2844 alloc_script = utils.FindFile(self.op.default_iallocator,
2845 constants.IALLOCATOR_SEARCH_PATH,
2847 if alloc_script is None:
2848 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2849 " specified" % self.op.default_iallocator,
2852 def Exec(self, feedback_fn):
2853 """Change the parameters of the cluster.
2856 if self.op.vg_name is not None:
2857 new_volume = self.op.vg_name
2860 if new_volume != self.cfg.GetVGName():
2861 self.cfg.SetVGName(new_volume)
2863 feedback_fn("Cluster LVM configuration already in desired"
2864 " state, not changing")
2865 if self.op.drbd_helper is not None:
2866 new_helper = self.op.drbd_helper
2869 if new_helper != self.cfg.GetDRBDHelper():
2870 self.cfg.SetDRBDHelper(new_helper)
2872 feedback_fn("Cluster DRBD helper already in desired state,"
2874 if self.op.hvparams:
2875 self.cluster.hvparams = self.new_hvparams
2877 self.cluster.os_hvp = self.new_os_hvp
2878 if self.op.enabled_hypervisors is not None:
2879 self.cluster.hvparams = self.new_hvparams
2880 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2881 if self.op.beparams:
2882 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2883 if self.op.nicparams:
2884 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2885 if self.op.osparams:
2886 self.cluster.osparams = self.new_osp
2888 if self.op.candidate_pool_size is not None:
2889 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2890 # we need to update the pool size here, otherwise the save will fail
2891 _AdjustCandidatePool(self, [])
2893 if self.op.maintain_node_health is not None:
2894 self.cluster.maintain_node_health = self.op.maintain_node_health
2896 if self.op.add_uids is not None:
2897 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2899 if self.op.remove_uids is not None:
2900 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2902 if self.op.uid_pool is not None:
2903 self.cluster.uid_pool = self.op.uid_pool
2905 if self.op.default_iallocator is not None:
2906 self.cluster.default_iallocator = self.op.default_iallocator
2908 if self.op.reserved_lvs is not None:
2909 self.cluster.reserved_lvs = self.op.reserved_lvs
2911 def helper_oss(aname, mods, desc):
2912 lst = getattr(self.cluster, aname)
2913 for key, val in mods:
2914 if key == constants.DDM_ADD:
2916 feedback_fn("OS %s already in %s, ignoring", val, desc)
2919 elif key == constants.DDM_REMOVE:
2923 feedback_fn("OS %s not found in %s, ignoring", val, desc)
2925 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2927 if self.op.hidden_oss:
2928 helper_oss("hidden_oss", self.op.hidden_oss,
2931 if self.op.blacklisted_oss:
2932 helper_oss("blacklisted_oss", self.op.blacklisted_oss,
2933 "blacklisted OS list")
2935 self.cfg.Update(self.cluster, feedback_fn)
2938 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2939 """Distribute additional files which are part of the cluster configuration.
2941 ConfigWriter takes care of distributing the config and ssconf files, but
2942 there are more files which should be distributed to all nodes. This function
2943 makes sure those are copied.
2945 @param lu: calling logical unit
2946 @param additional_nodes: list of nodes not in the config to distribute to
2949 # 1. Gather target nodes
2950 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2951 dist_nodes = lu.cfg.GetOnlineNodeList()
2952 if additional_nodes is not None:
2953 dist_nodes.extend(additional_nodes)
2954 if myself.name in dist_nodes:
2955 dist_nodes.remove(myself.name)
2957 # 2. Gather files to distribute
2958 dist_files = set([constants.ETC_HOSTS,
2959 constants.SSH_KNOWN_HOSTS_FILE,
2960 constants.RAPI_CERT_FILE,
2961 constants.RAPI_USERS_FILE,
2962 constants.CONFD_HMAC_KEY,
2963 constants.CLUSTER_DOMAIN_SECRET_FILE,
2966 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2967 for hv_name in enabled_hypervisors:
2968 hv_class = hypervisor.GetHypervisor(hv_name)
2969 dist_files.update(hv_class.GetAncillaryFiles())
2971 # 3. Perform the files upload
2972 for fname in dist_files:
2973 if os.path.exists(fname):
2974 result = lu.rpc.call_upload_file(dist_nodes, fname)
2975 for to_node, to_result in result.items():
2976 msg = to_result.fail_msg
2978 msg = ("Copy of file %s to node %s failed: %s" %
2979 (fname, to_node, msg))
2980 lu.proc.LogWarning(msg)
2983 class LURedistributeConfig(NoHooksLU):
2984 """Force the redistribution of cluster configuration.
2986 This is a very simple LU.
2991 def ExpandNames(self):
2992 self.needed_locks = {
2993 locking.LEVEL_NODE: locking.ALL_SET,
2995 self.share_locks[locking.LEVEL_NODE] = 1
2997 def Exec(self, feedback_fn):
2998 """Redistribute the configuration.
3001 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3002 _RedistributeAncillaryFiles(self)
3005 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3006 """Sleep and poll for an instance's disk to sync.
3009 if not instance.disks or disks is not None and not disks:
3012 disks = _ExpandCheckDisks(instance, disks)
3015 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3017 node = instance.primary_node
3020 lu.cfg.SetDiskID(dev, node)
3022 # TODO: Convert to utils.Retry
3025 degr_retries = 10 # in seconds, as we sleep 1 second each time
3029 cumul_degraded = False
3030 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3031 msg = rstats.fail_msg
3033 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3036 raise errors.RemoteError("Can't contact node %s for mirror data,"
3037 " aborting." % node)
3040 rstats = rstats.payload
3042 for i, mstat in enumerate(rstats):
3044 lu.LogWarning("Can't compute data for node %s/%s",
3045 node, disks[i].iv_name)
3048 cumul_degraded = (cumul_degraded or
3049 (mstat.is_degraded and mstat.sync_percent is None))
3050 if mstat.sync_percent is not None:
3052 if mstat.estimated_time is not None:
3053 rem_time = ("%s remaining (estimated)" %
3054 utils.FormatSeconds(mstat.estimated_time))
3055 max_time = mstat.estimated_time
3057 rem_time = "no time estimate"
3058 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3059 (disks[i].iv_name, mstat.sync_percent, rem_time))
3061 # if we're done but degraded, let's do a few small retries, to
3062 # make sure we see a stable and not transient situation; therefore
3063 # we force restart of the loop
3064 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3065 logging.info("Degraded disks found, %d retries left", degr_retries)
3073 time.sleep(min(60, max_time))
3076 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3077 return not cumul_degraded
3080 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3081 """Check that mirrors are not degraded.
3083 The ldisk parameter, if True, will change the test from the
3084 is_degraded attribute (which represents overall non-ok status for
3085 the device(s)) to the ldisk (representing the local storage status).
3088 lu.cfg.SetDiskID(dev, node)
3092 if on_primary or dev.AssembleOnSecondary():
3093 rstats = lu.rpc.call_blockdev_find(node, dev)
3094 msg = rstats.fail_msg
3096 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3098 elif not rstats.payload:
3099 lu.LogWarning("Can't find disk on node %s", node)
3103 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3105 result = result and not rstats.payload.is_degraded
3108 for child in dev.children:
3109 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3114 class LUDiagnoseOS(NoHooksLU):
3115 """Logical unit for OS diagnose/query.
3120 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3124 _BLK = "blacklisted"
3126 _FIELDS_STATIC = utils.FieldSet()
3127 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3128 "parameters", "api_versions", _HID, _BLK)
3130 def CheckArguments(self):
3132 raise errors.OpPrereqError("Selective OS query not supported",
3135 _CheckOutputFields(static=self._FIELDS_STATIC,
3136 dynamic=self._FIELDS_DYNAMIC,
3137 selected=self.op.output_fields)
3139 def ExpandNames(self):
3140 # Lock all nodes, in shared mode
3141 # Temporary removal of locks, should be reverted later
3142 # TODO: reintroduce locks when they are lighter-weight
3143 self.needed_locks = {}
3144 #self.share_locks[locking.LEVEL_NODE] = 1
3145 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3148 def _DiagnoseByOS(rlist):
3149 """Remaps a per-node return list into an a per-os per-node dictionary
3151 @param rlist: a map with node names as keys and OS objects as values
3154 @return: a dictionary with osnames as keys and as value another
3155 map, with nodes as keys and tuples of (path, status, diagnose,
3156 variants, parameters, api_versions) as values, eg::
3158 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3159 (/srv/..., False, "invalid api")],
3160 "node2": [(/srv/..., True, "", [], [])]}
3165 # we build here the list of nodes that didn't fail the RPC (at RPC
3166 # level), so that nodes with a non-responding node daemon don't
3167 # make all OSes invalid
3168 good_nodes = [node_name for node_name in rlist
3169 if not rlist[node_name].fail_msg]
3170 for node_name, nr in rlist.items():
3171 if nr.fail_msg or not nr.payload:
3173 for (name, path, status, diagnose, variants,
3174 params, api_versions) in nr.payload:
3175 if name not in all_os:
3176 # build a list of nodes for this os containing empty lists
3177 # for each node in node_list
3179 for nname in good_nodes:
3180 all_os[name][nname] = []
3181 # convert params from [name, help] to (name, help)
3182 params = [tuple(v) for v in params]
3183 all_os[name][node_name].append((path, status, diagnose,
3184 variants, params, api_versions))
3187 def Exec(self, feedback_fn):
3188 """Compute the list of OSes.
3191 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3192 node_data = self.rpc.call_os_diagnose(valid_nodes)
3193 pol = self._DiagnoseByOS(node_data)
3195 cluster = self.cfg.GetClusterInfo()
3197 for os_name in utils.NiceSort(pol.keys()):
3198 os_data = pol[os_name]
3201 (variants, params, api_versions) = null_state = (set(), set(), set())
3202 for idx, osl in enumerate(os_data.values()):
3203 valid = bool(valid and osl and osl[0][1])
3205 (variants, params, api_versions) = null_state
3207 node_variants, node_params, node_api = osl[0][3:6]
3208 if idx == 0: # first entry
3209 variants = set(node_variants)
3210 params = set(node_params)
3211 api_versions = set(node_api)
3212 else: # keep consistency
3213 variants.intersection_update(node_variants)
3214 params.intersection_update(node_params)
3215 api_versions.intersection_update(node_api)
3217 is_hid = os_name in cluster.hidden_oss
3218 is_blk = os_name in cluster.blacklisted_oss
3219 if ((self._HID not in self.op.output_fields and is_hid) or
3220 (self._BLK not in self.op.output_fields and is_blk) or
3221 (self._VLD not in self.op.output_fields and not valid)):
3224 for field in self.op.output_fields:
3227 elif field == self._VLD:
3229 elif field == "node_status":
3230 # this is just a copy of the dict
3232 for node_name, nos_list in os_data.items():
3233 val[node_name] = nos_list
3234 elif field == "variants":
3235 val = utils.NiceSort(list(variants))
3236 elif field == "parameters":
3238 elif field == "api_versions":
3239 val = list(api_versions)
3240 elif field == self._HID:
3242 elif field == self._BLK:
3245 raise errors.ParameterError(field)
3252 class LURemoveNode(LogicalUnit):
3253 """Logical unit for removing a node.
3256 HPATH = "node-remove"
3257 HTYPE = constants.HTYPE_NODE
3262 def BuildHooksEnv(self):
3265 This doesn't run on the target node in the pre phase as a failed
3266 node would then be impossible to remove.
3270 "OP_TARGET": self.op.node_name,
3271 "NODE_NAME": self.op.node_name,
3273 all_nodes = self.cfg.GetNodeList()
3275 all_nodes.remove(self.op.node_name)
3277 logging.warning("Node %s which is about to be removed not found"
3278 " in the all nodes list", self.op.node_name)
3279 return env, all_nodes, all_nodes
3281 def CheckPrereq(self):
3282 """Check prerequisites.
3285 - the node exists in the configuration
3286 - it does not have primary or secondary instances
3287 - it's not the master
3289 Any errors are signaled by raising errors.OpPrereqError.
3292 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3293 node = self.cfg.GetNodeInfo(self.op.node_name)
3294 assert node is not None
3296 instance_list = self.cfg.GetInstanceList()
3298 masternode = self.cfg.GetMasterNode()
3299 if node.name == masternode:
3300 raise errors.OpPrereqError("Node is the master node,"
3301 " you need to failover first.",
3304 for instance_name in instance_list:
3305 instance = self.cfg.GetInstanceInfo(instance_name)
3306 if node.name in instance.all_nodes:
3307 raise errors.OpPrereqError("Instance %s is still running on the node,"
3308 " please remove first." % instance_name,
3310 self.op.node_name = node.name
3313 def Exec(self, feedback_fn):
3314 """Removes the node from the cluster.
3318 logging.info("Stopping the node daemon and removing configs from node %s",
3321 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3323 # Promote nodes to master candidate as needed
3324 _AdjustCandidatePool(self, exceptions=[node.name])
3325 self.context.RemoveNode(node.name)
3327 # Run post hooks on the node before it's removed
3328 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3330 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3332 # pylint: disable-msg=W0702
3333 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3335 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3336 msg = result.fail_msg
3338 self.LogWarning("Errors encountered on the remote node while leaving"
3339 " the cluster: %s", msg)
3341 # Remove node from our /etc/hosts
3342 if self.cfg.GetClusterInfo().modify_etc_hosts:
3343 # FIXME: this should be done via an rpc call to node daemon
3344 utils.RemoveHostFromEtcHosts(node.name)
3345 _RedistributeAncillaryFiles(self)
3348 class LUQueryNodes(NoHooksLU):
3349 """Logical unit for querying nodes.
3352 # pylint: disable-msg=W0142
3355 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3356 ("use_locking", False, _TBool),
3360 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3361 "master_candidate", "offline", "drained"]
3363 _FIELDS_DYNAMIC = utils.FieldSet(
3365 "mtotal", "mnode", "mfree",
3367 "ctotal", "cnodes", "csockets",
3370 _FIELDS_STATIC = utils.FieldSet(*[
3371 "pinst_cnt", "sinst_cnt",
3372 "pinst_list", "sinst_list",
3373 "pip", "sip", "tags",
3375 "role"] + _SIMPLE_FIELDS
3378 def CheckArguments(self):
3379 _CheckOutputFields(static=self._FIELDS_STATIC,
3380 dynamic=self._FIELDS_DYNAMIC,
3381 selected=self.op.output_fields)
3383 def ExpandNames(self):
3384 self.needed_locks = {}
3385 self.share_locks[locking.LEVEL_NODE] = 1
3388 self.wanted = _GetWantedNodes(self, self.op.names)
3390 self.wanted = locking.ALL_SET
3392 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3393 self.do_locking = self.do_node_query and self.op.use_locking
3395 # if we don't request only static fields, we need to lock the nodes
3396 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3398 def Exec(self, feedback_fn):
3399 """Computes the list of nodes and their attributes.
3402 all_info = self.cfg.GetAllNodesInfo()
3404 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3405 elif self.wanted != locking.ALL_SET:
3406 nodenames = self.wanted
3407 missing = set(nodenames).difference(all_info.keys())
3409 raise errors.OpExecError(
3410 "Some nodes were removed before retrieving their data: %s" % missing)
3412 nodenames = all_info.keys()
3414 nodenames = utils.NiceSort(nodenames)
3415 nodelist = [all_info[name] for name in nodenames]
3417 # begin data gathering
3419 if self.do_node_query:
3421 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3422 self.cfg.GetHypervisorType())
3423 for name in nodenames:
3424 nodeinfo = node_data[name]
3425 if not nodeinfo.fail_msg and nodeinfo.payload:
3426 nodeinfo = nodeinfo.payload
3427 fn = utils.TryConvert
3429 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3430 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3431 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3432 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3433 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3434 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3435 "bootid": nodeinfo.get('bootid', None),
3436 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3437 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3440 live_data[name] = {}
3442 live_data = dict.fromkeys(nodenames, {})
3444 node_to_primary = dict([(name, set()) for name in nodenames])
3445 node_to_secondary = dict([(name, set()) for name in nodenames])
3447 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3448 "sinst_cnt", "sinst_list"))
3449 if inst_fields & frozenset(self.op.output_fields):
3450 inst_data = self.cfg.GetAllInstancesInfo()
3452 for inst in inst_data.values():
3453 if inst.primary_node in node_to_primary:
3454 node_to_primary[inst.primary_node].add(inst.name)
3455 for secnode in inst.secondary_nodes:
3456 if secnode in node_to_secondary:
3457 node_to_secondary[secnode].add(inst.name)
3459 master_node = self.cfg.GetMasterNode()
3461 # end data gathering
3464 for node in nodelist:
3466 for field in self.op.output_fields:
3467 if field in self._SIMPLE_FIELDS:
3468 val = getattr(node, field)
3469 elif field == "pinst_list":
3470 val = list(node_to_primary[node.name])
3471 elif field == "sinst_list":
3472 val = list(node_to_secondary[node.name])
3473 elif field == "pinst_cnt":
3474 val = len(node_to_primary[node.name])
3475 elif field == "sinst_cnt":
3476 val = len(node_to_secondary[node.name])
3477 elif field == "pip":
3478 val = node.primary_ip
3479 elif field == "sip":
3480 val = node.secondary_ip
3481 elif field == "tags":
3482 val = list(node.GetTags())
3483 elif field == "master":
3484 val = node.name == master_node
3485 elif self._FIELDS_DYNAMIC.Matches(field):
3486 val = live_data[node.name].get(field, None)
3487 elif field == "role":
3488 if node.name == master_node:
3490 elif node.master_candidate:
3499 raise errors.ParameterError(field)
3500 node_output.append(val)
3501 output.append(node_output)
3506 class LUQueryNodeVolumes(NoHooksLU):
3507 """Logical unit for getting volumes on node(s).
3511 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3512 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3515 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3516 _FIELDS_STATIC = utils.FieldSet("node")
3518 def CheckArguments(self):
3519 _CheckOutputFields(static=self._FIELDS_STATIC,
3520 dynamic=self._FIELDS_DYNAMIC,
3521 selected=self.op.output_fields)
3523 def ExpandNames(self):
3524 self.needed_locks = {}
3525 self.share_locks[locking.LEVEL_NODE] = 1
3526 if not self.op.nodes:
3527 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3529 self.needed_locks[locking.LEVEL_NODE] = \
3530 _GetWantedNodes(self, self.op.nodes)
3532 def Exec(self, feedback_fn):
3533 """Computes the list of nodes and their attributes.
3536 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3537 volumes = self.rpc.call_node_volumes(nodenames)
3539 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3540 in self.cfg.GetInstanceList()]
3542 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3545 for node in nodenames:
3546 nresult = volumes[node]
3549 msg = nresult.fail_msg
3551 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3554 node_vols = nresult.payload[:]
3555 node_vols.sort(key=lambda vol: vol['dev'])
3557 for vol in node_vols:
3559 for field in self.op.output_fields:
3562 elif field == "phys":
3566 elif field == "name":
3568 elif field == "size":
3569 val = int(float(vol['size']))
3570 elif field == "instance":
3572 if node not in lv_by_node[inst]:
3574 if vol['name'] in lv_by_node[inst][node]:
3580 raise errors.ParameterError(field)
3581 node_output.append(str(val))
3583 output.append(node_output)
3588 class LUQueryNodeStorage(NoHooksLU):
3589 """Logical unit for getting information on storage units on node(s).
3592 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3594 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3595 ("storage_type", _NoDefault, _CheckStorageType),
3596 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3597 ("name", None, _TMaybeString),
3601 def CheckArguments(self):
3602 _CheckOutputFields(static=self._FIELDS_STATIC,
3603 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3604 selected=self.op.output_fields)
3606 def ExpandNames(self):
3607 self.needed_locks = {}
3608 self.share_locks[locking.LEVEL_NODE] = 1
3611 self.needed_locks[locking.LEVEL_NODE] = \
3612 _GetWantedNodes(self, self.op.nodes)
3614 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3616 def Exec(self, feedback_fn):
3617 """Computes the list of nodes and their attributes.
3620 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3622 # Always get name to sort by
3623 if constants.SF_NAME in self.op.output_fields:
3624 fields = self.op.output_fields[:]
3626 fields = [constants.SF_NAME] + self.op.output_fields
3628 # Never ask for node or type as it's only known to the LU
3629 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3630 while extra in fields:
3631 fields.remove(extra)
3633 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3634 name_idx = field_idx[constants.SF_NAME]
3636 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3637 data = self.rpc.call_storage_list(self.nodes,
3638 self.op.storage_type, st_args,
3639 self.op.name, fields)
3643 for node in utils.NiceSort(self.nodes):
3644 nresult = data[node]
3648 msg = nresult.fail_msg
3650 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3653 rows = dict([(row[name_idx], row) for row in nresult.payload])
3655 for name in utils.NiceSort(rows.keys()):
3660 for field in self.op.output_fields:
3661 if field == constants.SF_NODE:
3663 elif field == constants.SF_TYPE:
3664 val = self.op.storage_type
3665 elif field in field_idx:
3666 val = row[field_idx[field]]
3668 raise errors.ParameterError(field)
3677 class LUModifyNodeStorage(NoHooksLU):
3678 """Logical unit for modifying a storage volume on a node.
3683 ("storage_type", _NoDefault, _CheckStorageType),
3684 ("name", _NoDefault, _TNonEmptyString),
3685 ("changes", _NoDefault, _TDict),
3689 def CheckArguments(self):
3690 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3692 storage_type = self.op.storage_type
3695 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3697 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3698 " modified" % storage_type,
3701 diff = set(self.op.changes.keys()) - modifiable
3703 raise errors.OpPrereqError("The following fields can not be modified for"
3704 " storage units of type '%s': %r" %
3705 (storage_type, list(diff)),
3708 def ExpandNames(self):
3709 self.needed_locks = {
3710 locking.LEVEL_NODE: self.op.node_name,
3713 def Exec(self, feedback_fn):
3714 """Computes the list of nodes and their attributes.
3717 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3718 result = self.rpc.call_storage_modify(self.op.node_name,
3719 self.op.storage_type, st_args,
3720 self.op.name, self.op.changes)
3721 result.Raise("Failed to modify storage unit '%s' on %s" %
3722 (self.op.name, self.op.node_name))
3725 class LUAddNode(LogicalUnit):
3726 """Logical unit for adding node to the cluster.
3730 HTYPE = constants.HTYPE_NODE
3733 ("primary_ip", None, _NoType),
3734 ("secondary_ip", None, _TMaybeString),
3735 ("readd", False, _TBool),
3738 def CheckArguments(self):
3739 # validate/normalize the node name
3740 self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3742 def BuildHooksEnv(self):
3745 This will run on all nodes before, and on all nodes + the new node after.
3749 "OP_TARGET": self.op.node_name,
3750 "NODE_NAME": self.op.node_name,
3751 "NODE_PIP": self.op.primary_ip,
3752 "NODE_SIP": self.op.secondary_ip,
3754 nodes_0 = self.cfg.GetNodeList()
3755 nodes_1 = nodes_0 + [self.op.node_name, ]
3756 return env, nodes_0, nodes_1
3758 def CheckPrereq(self):
3759 """Check prerequisites.
3762 - the new node is not already in the config
3764 - its parameters (single/dual homed) matches the cluster
3766 Any errors are signaled by raising errors.OpPrereqError.
3769 node_name = self.op.node_name
3772 dns_data = netutils.GetHostInfo(node_name)
3774 node = dns_data.name
3775 primary_ip = self.op.primary_ip = dns_data.ip
3776 if self.op.secondary_ip is None:
3777 self.op.secondary_ip = primary_ip
3778 if not netutils.IsValidIP4(self.op.secondary_ip):
3779 raise errors.OpPrereqError("Invalid secondary IP given",
3781 secondary_ip = self.op.secondary_ip
3783 node_list = cfg.GetNodeList()
3784 if not self.op.readd and node in node_list:
3785 raise errors.OpPrereqError("Node %s is already in the configuration" %
3786 node, errors.ECODE_EXISTS)
3787 elif self.op.readd and node not in node_list:
3788 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3791 self.changed_primary_ip = False
3793 for existing_node_name in node_list:
3794 existing_node = cfg.GetNodeInfo(existing_node_name)
3796 if self.op.readd and node == existing_node_name:
3797 if existing_node.secondary_ip != secondary_ip:
3798 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3799 " address configuration as before",
3801 if existing_node.primary_ip != primary_ip:
3802 self.changed_primary_ip = True
3806 if (existing_node.primary_ip == primary_ip or
3807 existing_node.secondary_ip == primary_ip or
3808 existing_node.primary_ip == secondary_ip or
3809 existing_node.secondary_ip == secondary_ip):
3810 raise errors.OpPrereqError("New node ip address(es) conflict with"
3811 " existing node %s" % existing_node.name,
3812 errors.ECODE_NOTUNIQUE)
3814 # check that the type of the node (single versus dual homed) is the
3815 # same as for the master
3816 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3817 master_singlehomed = myself.secondary_ip == myself.primary_ip
3818 newbie_singlehomed = secondary_ip == primary_ip
3819 if master_singlehomed != newbie_singlehomed:
3820 if master_singlehomed:
3821 raise errors.OpPrereqError("The master has no private ip but the"
3822 " new node has one",
3825 raise errors.OpPrereqError("The master has a private ip but the"
3826 " new node doesn't have one",
3829 # checks reachability
3830 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3831 raise errors.OpPrereqError("Node not reachable by ping",
3832 errors.ECODE_ENVIRON)
3834 if not newbie_singlehomed:
3835 # check reachability from my secondary ip to newbie's secondary ip
3836 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3837 source=myself.secondary_ip):
3838 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3839 " based ping to noded port",
3840 errors.ECODE_ENVIRON)
3847 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3850 self.new_node = self.cfg.GetNodeInfo(node)
3851 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3853 self.new_node = objects.Node(name=node,
3854 primary_ip=primary_ip,
3855 secondary_ip=secondary_ip,
3856 master_candidate=self.master_candidate,
3857 offline=False, drained=False)
3859 def Exec(self, feedback_fn):
3860 """Adds the new node to the cluster.
3863 new_node = self.new_node
3864 node = new_node.name
3866 # for re-adds, reset the offline/drained/master-candidate flags;
3867 # we need to reset here, otherwise offline would prevent RPC calls
3868 # later in the procedure; this also means that if the re-add
3869 # fails, we are left with a non-offlined, broken node
3871 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3872 self.LogInfo("Readding a node, the offline/drained flags were reset")
3873 # if we demote the node, we do cleanup later in the procedure
3874 new_node.master_candidate = self.master_candidate
3875 if self.changed_primary_ip:
3876 new_node.primary_ip = self.op.primary_ip
3878 # notify the user about any possible mc promotion
3879 if new_node.master_candidate:
3880 self.LogInfo("Node will be a master candidate")
3882 # check connectivity
3883 result = self.rpc.call_version([node])[node]
3884 result.Raise("Can't get version information from node %s" % node)
3885 if constants.PROTOCOL_VERSION == result.payload:
3886 logging.info("Communication to node %s fine, sw version %s match",
3887 node, result.payload)
3889 raise errors.OpExecError("Version mismatch master version %s,"
3890 " node version %s" %
3891 (constants.PROTOCOL_VERSION, result.payload))
3894 if self.cfg.GetClusterInfo().modify_ssh_setup:
3895 logging.info("Copy ssh key to node %s", node)
3896 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3898 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3899 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3903 keyarray.append(utils.ReadFile(i))
3905 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3906 keyarray[2], keyarray[3], keyarray[4],
3908 result.Raise("Cannot transfer ssh keys to the new node")
3910 # Add node to our /etc/hosts, and add key to known_hosts
3911 if self.cfg.GetClusterInfo().modify_etc_hosts:
3912 # FIXME: this should be done via an rpc call to node daemon
3913 utils.AddHostToEtcHosts(new_node.name)
3915 if new_node.secondary_ip != new_node.primary_ip:
3916 result = self.rpc.call_node_has_ip_address(new_node.name,
3917 new_node.secondary_ip)
3918 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3919 prereq=True, ecode=errors.ECODE_ENVIRON)
3920 if not result.payload:
3921 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3922 " you gave (%s). Please fix and re-run this"
3923 " command." % new_node.secondary_ip)
3925 node_verify_list = [self.cfg.GetMasterNode()]
3926 node_verify_param = {
3927 constants.NV_NODELIST: [node],
3928 # TODO: do a node-net-test as well?
3931 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3932 self.cfg.GetClusterName())
3933 for verifier in node_verify_list:
3934 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3935 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3937 for failed in nl_payload:
3938 feedback_fn("ssh/hostname verification failed"
3939 " (checking from %s): %s" %
3940 (verifier, nl_payload[failed]))
3941 raise errors.OpExecError("ssh/hostname verification failed.")
3944 _RedistributeAncillaryFiles(self)
3945 self.context.ReaddNode(new_node)
3946 # make sure we redistribute the config
3947 self.cfg.Update(new_node, feedback_fn)
3948 # and make sure the new node will not have old files around
3949 if not new_node.master_candidate:
3950 result = self.rpc.call_node_demote_from_mc(new_node.name)
3951 msg = result.fail_msg
3953 self.LogWarning("Node failed to demote itself from master"
3954 " candidate status: %s" % msg)
3956 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3957 self.context.AddNode(new_node, self.proc.GetECId())
3960 class LUSetNodeParams(LogicalUnit):
3961 """Modifies the parameters of a node.
3964 HPATH = "node-modify"
3965 HTYPE = constants.HTYPE_NODE
3968 ("master_candidate", None, _TMaybeBool),
3969 ("offline", None, _TMaybeBool),
3970 ("drained", None, _TMaybeBool),
3971 ("auto_promote", False, _TBool),
3976 def CheckArguments(self):
3977 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3978 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3979 if all_mods.count(None) == 3:
3980 raise errors.OpPrereqError("Please pass at least one modification",
3982 if all_mods.count(True) > 1:
3983 raise errors.OpPrereqError("Can't set the node into more than one"
3984 " state at the same time",
3987 # Boolean value that tells us whether we're offlining or draining the node
3988 self.offline_or_drain = (self.op.offline == True or
3989 self.op.drained == True)
3990 self.deoffline_or_drain = (self.op.offline == False or
3991 self.op.drained == False)
3992 self.might_demote = (self.op.master_candidate == False or
3993 self.offline_or_drain)
3995 self.lock_all = self.op.auto_promote and self.might_demote
3998 def ExpandNames(self):
4000 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4002 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4004 def BuildHooksEnv(self):
4007 This runs on the master node.
4011 "OP_TARGET": self.op.node_name,
4012 "MASTER_CANDIDATE": str(self.op.master_candidate),
4013 "OFFLINE": str(self.op.offline),
4014 "DRAINED": str(self.op.drained),
4016 nl = [self.cfg.GetMasterNode(),
4020 def CheckPrereq(self):
4021 """Check prerequisites.
4023 This only checks the instance list against the existing names.
4026 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4028 if (self.op.master_candidate is not None or
4029 self.op.drained is not None or
4030 self.op.offline is not None):
4031 # we can't change the master's node flags
4032 if self.op.node_name == self.cfg.GetMasterNode():
4033 raise errors.OpPrereqError("The master role can be changed"
4034 " only via master-failover",
4038 if node.master_candidate and self.might_demote and not self.lock_all:
4039 assert not self.op.auto_promote, "auto-promote set but lock_all not"
4040 # check if after removing the current node, we're missing master
4042 (mc_remaining, mc_should, _) = \
4043 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4044 if mc_remaining < mc_should:
4045 raise errors.OpPrereqError("Not enough master candidates, please"
4046 " pass auto_promote to allow promotion",
4049 if (self.op.master_candidate == True and
4050 ((node.offline and not self.op.offline == False) or
4051 (node.drained and not self.op.drained == False))):
4052 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4053 " to master_candidate" % node.name,
4056 # If we're being deofflined/drained, we'll MC ourself if needed
4057 if (self.deoffline_or_drain and not self.offline_or_drain and not
4058 self.op.master_candidate == True and not node.master_candidate):
4059 self.op.master_candidate = _DecideSelfPromotion(self)
4060 if self.op.master_candidate:
4061 self.LogInfo("Autopromoting node to master candidate")
4065 def Exec(self, feedback_fn):
4074 if self.op.offline is not None:
4075 node.offline = self.op.offline
4076 result.append(("offline", str(self.op.offline)))
4077 if self.op.offline == True:
4078 if node.master_candidate:
4079 node.master_candidate = False
4081 result.append(("master_candidate", "auto-demotion due to offline"))
4083 node.drained = False
4084 result.append(("drained", "clear drained status due to offline"))
4086 if self.op.master_candidate is not None:
4087 node.master_candidate = self.op.master_candidate
4089 result.append(("master_candidate", str(self.op.master_candidate)))
4090 if self.op.master_candidate == False:
4091 rrc = self.rpc.call_node_demote_from_mc(node.name)
4094 self.LogWarning("Node failed to demote itself: %s" % msg)
4096 if self.op.drained is not None:
4097 node.drained = self.op.drained
4098 result.append(("drained", str(self.op.drained)))
4099 if self.op.drained == True:
4100 if node.master_candidate:
4101 node.master_candidate = False
4103 result.append(("master_candidate", "auto-demotion due to drain"))
4104 rrc = self.rpc.call_node_demote_from_mc(node.name)
4107 self.LogWarning("Node failed to demote itself: %s" % msg)
4109 node.offline = False
4110 result.append(("offline", "clear offline status due to drain"))
4112 # we locked all nodes, we adjust the CP before updating this node
4114 _AdjustCandidatePool(self, [node.name])
4116 # this will trigger configuration file update, if needed
4117 self.cfg.Update(node, feedback_fn)
4119 # this will trigger job queue propagation or cleanup
4121 self.context.ReaddNode(node)
4126 class LUPowercycleNode(NoHooksLU):
4127 """Powercycles a node.
4136 def CheckArguments(self):
4137 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4138 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4139 raise errors.OpPrereqError("The node is the master and the force"
4140 " parameter was not set",
4143 def ExpandNames(self):
4144 """Locking for PowercycleNode.
4146 This is a last-resort option and shouldn't block on other
4147 jobs. Therefore, we grab no locks.
4150 self.needed_locks = {}
4152 def Exec(self, feedback_fn):
4156 result = self.rpc.call_node_powercycle(self.op.node_name,
4157 self.cfg.GetHypervisorType())
4158 result.Raise("Failed to schedule the reboot")
4159 return result.payload
4162 class LUQueryClusterInfo(NoHooksLU):
4163 """Query cluster configuration.
4168 def ExpandNames(self):
4169 self.needed_locks = {}
4171 def Exec(self, feedback_fn):
4172 """Return cluster config.
4175 cluster = self.cfg.GetClusterInfo()
4178 # Filter just for enabled hypervisors
4179 for os_name, hv_dict in cluster.os_hvp.items():
4180 os_hvp[os_name] = {}
4181 for hv_name, hv_params in hv_dict.items():
4182 if hv_name in cluster.enabled_hypervisors:
4183 os_hvp[os_name][hv_name] = hv_params
4186 "software_version": constants.RELEASE_VERSION,
4187 "protocol_version": constants.PROTOCOL_VERSION,
4188 "config_version": constants.CONFIG_VERSION,
4189 "os_api_version": max(constants.OS_API_VERSIONS),
4190 "export_version": constants.EXPORT_VERSION,
4191 "architecture": (platform.architecture()[0], platform.machine()),
4192 "name": cluster.cluster_name,
4193 "master": cluster.master_node,
4194 "default_hypervisor": cluster.enabled_hypervisors[0],
4195 "enabled_hypervisors": cluster.enabled_hypervisors,
4196 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4197 for hypervisor_name in cluster.enabled_hypervisors]),
4199 "beparams": cluster.beparams,
4200 "osparams": cluster.osparams,
4201 "nicparams": cluster.nicparams,
4202 "candidate_pool_size": cluster.candidate_pool_size,
4203 "master_netdev": cluster.master_netdev,
4204 "volume_group_name": cluster.volume_group_name,
4205 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4206 "file_storage_dir": cluster.file_storage_dir,
4207 "maintain_node_health": cluster.maintain_node_health,
4208 "ctime": cluster.ctime,
4209 "mtime": cluster.mtime,
4210 "uuid": cluster.uuid,
4211 "tags": list(cluster.GetTags()),
4212 "uid_pool": cluster.uid_pool,
4213 "default_iallocator": cluster.default_iallocator,
4214 "reserved_lvs": cluster.reserved_lvs,
4220 class LUQueryConfigValues(NoHooksLU):
4221 """Return configuration values.
4224 _OP_PARAMS = [_POutputFields]
4226 _FIELDS_DYNAMIC = utils.FieldSet()
4227 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4230 def CheckArguments(self):
4231 _CheckOutputFields(static=self._FIELDS_STATIC,
4232 dynamic=self._FIELDS_DYNAMIC,
4233 selected=self.op.output_fields)
4235 def ExpandNames(self):
4236 self.needed_locks = {}
4238 def Exec(self, feedback_fn):
4239 """Dump a representation of the cluster config to the standard output.
4243 for field in self.op.output_fields:
4244 if field == "cluster_name":
4245 entry = self.cfg.GetClusterName()
4246 elif field == "master_node":
4247 entry = self.cfg.GetMasterNode()
4248 elif field == "drain_flag":
4249 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4250 elif field == "watcher_pause":
4251 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4253 raise errors.ParameterError(field)
4254 values.append(entry)
4258 class LUActivateInstanceDisks(NoHooksLU):
4259 """Bring up an instance's disks.
4264 ("ignore_size", False, _TBool),
4268 def ExpandNames(self):
4269 self._ExpandAndLockInstance()
4270 self.needed_locks[locking.LEVEL_NODE] = []
4271 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4273 def DeclareLocks(self, level):
4274 if level == locking.LEVEL_NODE:
4275 self._LockInstancesNodes()
4277 def CheckPrereq(self):
4278 """Check prerequisites.
4280 This checks that the instance is in the cluster.
4283 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4284 assert self.instance is not None, \
4285 "Cannot retrieve locked instance %s" % self.op.instance_name
4286 _CheckNodeOnline(self, self.instance.primary_node)
4288 def Exec(self, feedback_fn):
4289 """Activate the disks.
4292 disks_ok, disks_info = \
4293 _AssembleInstanceDisks(self, self.instance,
4294 ignore_size=self.op.ignore_size)
4296 raise errors.OpExecError("Cannot activate block devices")
4301 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4303 """Prepare the block devices for an instance.
4305 This sets up the block devices on all nodes.
4307 @type lu: L{LogicalUnit}
4308 @param lu: the logical unit on whose behalf we execute
4309 @type instance: L{objects.Instance}
4310 @param instance: the instance for whose disks we assemble
4311 @type disks: list of L{objects.Disk} or None
4312 @param disks: which disks to assemble (or all, if None)
4313 @type ignore_secondaries: boolean
4314 @param ignore_secondaries: if true, errors on secondary nodes
4315 won't result in an error return from the function
4316 @type ignore_size: boolean
4317 @param ignore_size: if true, the current known size of the disk
4318 will not be used during the disk activation, useful for cases
4319 when the size is wrong
4320 @return: False if the operation failed, otherwise a list of
4321 (host, instance_visible_name, node_visible_name)
4322 with the mapping from node devices to instance devices
4327 iname = instance.name
4328 disks = _ExpandCheckDisks(instance, disks)
4330 # With the two passes mechanism we try to reduce the window of
4331 # opportunity for the race condition of switching DRBD to primary
4332 # before handshaking occured, but we do not eliminate it
4334 # The proper fix would be to wait (with some limits) until the
4335 # connection has been made and drbd transitions from WFConnection
4336 # into any other network-connected state (Connected, SyncTarget,
4339 # 1st pass, assemble on all nodes in secondary mode
4340 for inst_disk in disks:
4341 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4343 node_disk = node_disk.Copy()
4344 node_disk.UnsetSize()
4345 lu.cfg.SetDiskID(node_disk, node)
4346 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4347 msg = result.fail_msg
4349 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4350 " (is_primary=False, pass=1): %s",
4351 inst_disk.iv_name, node, msg)
4352 if not ignore_secondaries:
4355 # FIXME: race condition on drbd migration to primary
4357 # 2nd pass, do only the primary node
4358 for inst_disk in disks:
4361 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4362 if node != instance.primary_node:
4365 node_disk = node_disk.Copy()
4366 node_disk.UnsetSize()
4367 lu.cfg.SetDiskID(node_disk, node)
4368 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4369 msg = result.fail_msg
4371 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4372 " (is_primary=True, pass=2): %s",
4373 inst_disk.iv_name, node, msg)
4376 dev_path = result.payload
4378 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4380 # leave the disks configured for the primary node
4381 # this is a workaround that would be fixed better by
4382 # improving the logical/physical id handling
4384 lu.cfg.SetDiskID(disk, instance.primary_node)
4386 return disks_ok, device_info
4389 def _StartInstanceDisks(lu, instance, force):
4390 """Start the disks of an instance.
4393 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4394 ignore_secondaries=force)
4396 _ShutdownInstanceDisks(lu, instance)
4397 if force is not None and not force:
4398 lu.proc.LogWarning("", hint="If the message above refers to a"
4400 " you can retry the operation using '--force'.")
4401 raise errors.OpExecError("Disk consistency error")
4404 class LUDeactivateInstanceDisks(NoHooksLU):
4405 """Shutdown an instance's disks.
4413 def ExpandNames(self):
4414 self._ExpandAndLockInstance()
4415 self.needed_locks[locking.LEVEL_NODE] = []
4416 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4418 def DeclareLocks(self, level):
4419 if level == locking.LEVEL_NODE:
4420 self._LockInstancesNodes()
4422 def CheckPrereq(self):
4423 """Check prerequisites.
4425 This checks that the instance is in the cluster.
4428 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4429 assert self.instance is not None, \
4430 "Cannot retrieve locked instance %s" % self.op.instance_name
4432 def Exec(self, feedback_fn):
4433 """Deactivate the disks
4436 instance = self.instance
4437 _SafeShutdownInstanceDisks(self, instance)
4440 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4441 """Shutdown block devices of an instance.
4443 This function checks if an instance is running, before calling
4444 _ShutdownInstanceDisks.
4447 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4448 _ShutdownInstanceDisks(lu, instance, disks=disks)
4451 def _ExpandCheckDisks(instance, disks):
4452 """Return the instance disks selected by the disks list
4454 @type disks: list of L{objects.Disk} or None
4455 @param disks: selected disks
4456 @rtype: list of L{objects.Disk}
4457 @return: selected instance disks to act on
4461 return instance.disks
4463 if not set(disks).issubset(instance.disks):
4464 raise errors.ProgrammerError("Can only act on disks belonging to the"
4469 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4470 """Shutdown block devices of an instance.
4472 This does the shutdown on all nodes of the instance.
4474 If the ignore_primary is false, errors on the primary node are
4479 disks = _ExpandCheckDisks(instance, disks)
4482 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4483 lu.cfg.SetDiskID(top_disk, node)
4484 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4485 msg = result.fail_msg
4487 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4488 disk.iv_name, node, msg)
4489 if not ignore_primary or node != instance.primary_node:
4494 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4495 """Checks if a node has enough free memory.
4497 This function check if a given node has the needed amount of free
4498 memory. In case the node has less memory or we cannot get the
4499 information from the node, this function raise an OpPrereqError
4502 @type lu: C{LogicalUnit}
4503 @param lu: a logical unit from which we get configuration data
4505 @param node: the node to check
4506 @type reason: C{str}
4507 @param reason: string to use in the error message
4508 @type requested: C{int}
4509 @param requested: the amount of memory in MiB to check for
4510 @type hypervisor_name: C{str}
4511 @param hypervisor_name: the hypervisor to ask for memory stats
4512 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4513 we cannot check the node
4516 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4517 nodeinfo[node].Raise("Can't get data from node %s" % node,
4518 prereq=True, ecode=errors.ECODE_ENVIRON)
4519 free_mem = nodeinfo[node].payload.get('memory_free', None)
4520 if not isinstance(free_mem, int):
4521 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4522 " was '%s'" % (node, free_mem),
4523 errors.ECODE_ENVIRON)
4524 if requested > free_mem:
4525 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4526 " needed %s MiB, available %s MiB" %
4527 (node, reason, requested, free_mem),
4531 def _CheckNodesFreeDisk(lu, nodenames, requested):
4532 """Checks if nodes have enough free disk space in the default VG.
4534 This function check if all given nodes have the needed amount of
4535 free disk. In case any node has less disk or we cannot get the
4536 information from the node, this function raise an OpPrereqError
4539 @type lu: C{LogicalUnit}
4540 @param lu: a logical unit from which we get configuration data
4541 @type nodenames: C{list}
4542 @param nodenames: the list of node names to check
4543 @type requested: C{int}
4544 @param requested: the amount of disk in MiB to check for
4545 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4546 we cannot check the node
4549 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4550 lu.cfg.GetHypervisorType())
4551 for node in nodenames:
4552 info = nodeinfo[node]
4553 info.Raise("Cannot get current information from node %s" % node,
4554 prereq=True, ecode=errors.ECODE_ENVIRON)
4555 vg_free = info.payload.get("vg_free", None)
4556 if not isinstance(vg_free, int):
4557 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4558 " result was '%s'" % (node, vg_free),
4559 errors.ECODE_ENVIRON)
4560 if requested > vg_free:
4561 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4562 " required %d MiB, available %d MiB" %
4563 (node, requested, vg_free),
4567 class LUStartupInstance(LogicalUnit):
4568 """Starts an instance.
4571 HPATH = "instance-start"
4572 HTYPE = constants.HTYPE_INSTANCE
4576 ("hvparams", _EmptyDict, _TDict),
4577 ("beparams", _EmptyDict, _TDict),
4581 def CheckArguments(self):
4583 if self.op.beparams:
4584 # fill the beparams dict
4585 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4587 def ExpandNames(self):
4588 self._ExpandAndLockInstance()
4590 def BuildHooksEnv(self):
4593 This runs on master, primary and secondary nodes of the instance.
4597 "FORCE": self.op.force,
4599 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4600 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4603 def CheckPrereq(self):
4604 """Check prerequisites.
4606 This checks that the instance is in the cluster.
4609 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4610 assert self.instance is not None, \
4611 "Cannot retrieve locked instance %s" % self.op.instance_name
4614 if self.op.hvparams:
4615 # check hypervisor parameter syntax (locally)
4616 cluster = self.cfg.GetClusterInfo()
4617 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4618 filled_hvp = cluster.FillHV(instance)
4619 filled_hvp.update(self.op.hvparams)
4620 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4621 hv_type.CheckParameterSyntax(filled_hvp)
4622 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4624 _CheckNodeOnline(self, instance.primary_node)
4626 bep = self.cfg.GetClusterInfo().FillBE(instance)
4627 # check bridges existence
4628 _CheckInstanceBridgesExist(self, instance)
4630 remote_info = self.rpc.call_instance_info(instance.primary_node,
4632 instance.hypervisor)
4633 remote_info.Raise("Error checking node %s" % instance.primary_node,
4634 prereq=True, ecode=errors.ECODE_ENVIRON)
4635 if not remote_info.payload: # not running already
4636 _CheckNodeFreeMemory(self, instance.primary_node,
4637 "starting instance %s" % instance.name,
4638 bep[constants.BE_MEMORY], instance.hypervisor)
4640 def Exec(self, feedback_fn):
4641 """Start the instance.
4644 instance = self.instance
4645 force = self.op.force
4647 self.cfg.MarkInstanceUp(instance.name)
4649 node_current = instance.primary_node
4651 _StartInstanceDisks(self, instance, force)
4653 result = self.rpc.call_instance_start(node_current, instance,
4654 self.op.hvparams, self.op.beparams)
4655 msg = result.fail_msg
4657 _ShutdownInstanceDisks(self, instance)
4658 raise errors.OpExecError("Could not start instance: %s" % msg)
4661 class LURebootInstance(LogicalUnit):
4662 """Reboot an instance.
4665 HPATH = "instance-reboot"
4666 HTYPE = constants.HTYPE_INSTANCE
4669 ("ignore_secondaries", False, _TBool),
4670 ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4675 def ExpandNames(self):
4676 self._ExpandAndLockInstance()
4678 def BuildHooksEnv(self):
4681 This runs on master, primary and secondary nodes of the instance.
4685 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4686 "REBOOT_TYPE": self.op.reboot_type,
4687 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4689 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4690 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4693 def CheckPrereq(self):
4694 """Check prerequisites.
4696 This checks that the instance is in the cluster.
4699 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4700 assert self.instance is not None, \
4701 "Cannot retrieve locked instance %s" % self.op.instance_name
4703 _CheckNodeOnline(self, instance.primary_node)
4705 # check bridges existence
4706 _CheckInstanceBridgesExist(self, instance)
4708 def Exec(self, feedback_fn):
4709 """Reboot the instance.
4712 instance = self.instance
4713 ignore_secondaries = self.op.ignore_secondaries
4714 reboot_type = self.op.reboot_type
4716 node_current = instance.primary_node
4718 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4719 constants.INSTANCE_REBOOT_HARD]:
4720 for disk in instance.disks:
4721 self.cfg.SetDiskID(disk, node_current)
4722 result = self.rpc.call_instance_reboot(node_current, instance,
4724 self.op.shutdown_timeout)
4725 result.Raise("Could not reboot instance")
4727 result = self.rpc.call_instance_shutdown(node_current, instance,
4728 self.op.shutdown_timeout)
4729 result.Raise("Could not shutdown instance for full reboot")
4730 _ShutdownInstanceDisks(self, instance)
4731 _StartInstanceDisks(self, instance, ignore_secondaries)
4732 result = self.rpc.call_instance_start(node_current, instance, None, None)
4733 msg = result.fail_msg
4735 _ShutdownInstanceDisks(self, instance)
4736 raise errors.OpExecError("Could not start instance for"
4737 " full reboot: %s" % msg)
4739 self.cfg.MarkInstanceUp(instance.name)
4742 class LUShutdownInstance(LogicalUnit):
4743 """Shutdown an instance.
4746 HPATH = "instance-stop"
4747 HTYPE = constants.HTYPE_INSTANCE
4750 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4754 def ExpandNames(self):
4755 self._ExpandAndLockInstance()
4757 def BuildHooksEnv(self):
4760 This runs on master, primary and secondary nodes of the instance.
4763 env = _BuildInstanceHookEnvByObject(self, self.instance)
4764 env["TIMEOUT"] = self.op.timeout
4765 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4768 def CheckPrereq(self):
4769 """Check prerequisites.
4771 This checks that the instance is in the cluster.
4774 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4775 assert self.instance is not None, \
4776 "Cannot retrieve locked instance %s" % self.op.instance_name
4777 _CheckNodeOnline(self, self.instance.primary_node)
4779 def Exec(self, feedback_fn):
4780 """Shutdown the instance.
4783 instance = self.instance
4784 node_current = instance.primary_node
4785 timeout = self.op.timeout
4786 self.cfg.MarkInstanceDown(instance.name)
4787 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4788 msg = result.fail_msg
4790 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4792 _ShutdownInstanceDisks(self, instance)
4795 class LUReinstallInstance(LogicalUnit):
4796 """Reinstall an instance.
4799 HPATH = "instance-reinstall"
4800 HTYPE = constants.HTYPE_INSTANCE
4803 ("os_type", None, _TMaybeString),
4804 ("force_variant", False, _TBool),
4808 def ExpandNames(self):
4809 self._ExpandAndLockInstance()
4811 def BuildHooksEnv(self):
4814 This runs on master, primary and secondary nodes of the instance.
4817 env = _BuildInstanceHookEnvByObject(self, self.instance)
4818 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4821 def CheckPrereq(self):
4822 """Check prerequisites.
4824 This checks that the instance is in the cluster and is not running.
4827 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4828 assert instance is not None, \
4829 "Cannot retrieve locked instance %s" % self.op.instance_name
4830 _CheckNodeOnline(self, instance.primary_node)
4832 if instance.disk_template == constants.DT_DISKLESS:
4833 raise errors.OpPrereqError("Instance '%s' has no disks" %
4834 self.op.instance_name,
4836 _CheckInstanceDown(self, instance, "cannot reinstall")
4838 if self.op.os_type is not None:
4840 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4841 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4843 self.instance = instance
4845 def Exec(self, feedback_fn):
4846 """Reinstall the instance.
4849 inst = self.instance
4851 if self.op.os_type is not None:
4852 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4853 inst.os = self.op.os_type
4854 self.cfg.Update(inst, feedback_fn)
4856 _StartInstanceDisks(self, inst, None)
4858 feedback_fn("Running the instance OS create scripts...")
4859 # FIXME: pass debug option from opcode to backend
4860 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4861 self.op.debug_level)
4862 result.Raise("Could not install OS for instance %s on node %s" %
4863 (inst.name, inst.primary_node))
4865 _ShutdownInstanceDisks(self, inst)
4868 class LURecreateInstanceDisks(LogicalUnit):
4869 """Recreate an instance's missing disks.
4872 HPATH = "instance-recreate-disks"
4873 HTYPE = constants.HTYPE_INSTANCE
4876 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4880 def ExpandNames(self):
4881 self._ExpandAndLockInstance()
4883 def BuildHooksEnv(self):
4886 This runs on master, primary and secondary nodes of the instance.
4889 env = _BuildInstanceHookEnvByObject(self, self.instance)
4890 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4893 def CheckPrereq(self):
4894 """Check prerequisites.
4896 This checks that the instance is in the cluster and is not running.
4899 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4900 assert instance is not None, \
4901 "Cannot retrieve locked instance %s" % self.op.instance_name
4902 _CheckNodeOnline(self, instance.primary_node)
4904 if instance.disk_template == constants.DT_DISKLESS:
4905 raise errors.OpPrereqError("Instance '%s' has no disks" %
4906 self.op.instance_name, errors.ECODE_INVAL)
4907 _CheckInstanceDown(self, instance, "cannot recreate disks")
4909 if not self.op.disks:
4910 self.op.disks = range(len(instance.disks))
4912 for idx in self.op.disks:
4913 if idx >= len(instance.disks):
4914 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4917 self.instance = instance
4919 def Exec(self, feedback_fn):
4920 """Recreate the disks.
4924 for idx, _ in enumerate(self.instance.disks):
4925 if idx not in self.op.disks: # disk idx has not been passed in
4929 _CreateDisks(self, self.instance, to_skip=to_skip)
4932 class LURenameInstance(LogicalUnit):
4933 """Rename an instance.
4936 HPATH = "instance-rename"
4937 HTYPE = constants.HTYPE_INSTANCE
4940 ("new_name", _NoDefault, _TNonEmptyString),
4941 ("ip_check", False, _TBool),
4942 ("name_check", True, _TBool),
4945 def CheckArguments(self):
4949 if self.op.ip_check and not self.op.name_check:
4950 # TODO: make the ip check more flexible and not depend on the name check
4951 raise errors.OpPrereqError("Cannot do ip check without a name check",
4954 def BuildHooksEnv(self):
4957 This runs on master, primary and secondary nodes of the instance.
4960 env = _BuildInstanceHookEnvByObject(self, self.instance)
4961 env["INSTANCE_NEW_NAME"] = self.op.new_name
4962 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4965 def CheckPrereq(self):
4966 """Check prerequisites.
4968 This checks that the instance is in the cluster and is not running.
4971 self.op.instance_name = _ExpandInstanceName(self.cfg,
4972 self.op.instance_name)
4973 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4974 assert instance is not None
4975 _CheckNodeOnline(self, instance.primary_node)
4976 _CheckInstanceDown(self, instance, "cannot rename")
4977 self.instance = instance
4979 new_name = self.op.new_name
4980 if self.op.name_check:
4981 hostinfo = netutils.HostInfo(netutils.HostInfo.NormalizeName(new_name))
4982 new_name = self.op.new_name = hostinfo.name
4983 if (self.op.ip_check and
4984 netutils.TcpPing(hostinfo.ip, constants.DEFAULT_NODED_PORT)):
4985 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4986 (hostinfo.ip, new_name),
4987 errors.ECODE_NOTUNIQUE)
4989 instance_list = self.cfg.GetInstanceList()
4990 if new_name in instance_list:
4991 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4992 new_name, errors.ECODE_EXISTS)
4995 def Exec(self, feedback_fn):
4996 """Reinstall the instance.
4999 inst = self.instance
5000 old_name = inst.name
5002 if inst.disk_template == constants.DT_FILE:
5003 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5005 self.cfg.RenameInstance(inst.name, self.op.new_name)
5006 # Change the instance lock. This is definitely safe while we hold the BGL
5007 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5008 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5010 # re-read the instance from the configuration after rename
5011 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5013 if inst.disk_template == constants.DT_FILE:
5014 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5015 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5016 old_file_storage_dir,
5017 new_file_storage_dir)
5018 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5019 " (but the instance has been renamed in Ganeti)" %
5020 (inst.primary_node, old_file_storage_dir,
5021 new_file_storage_dir))
5023 _StartInstanceDisks(self, inst, None)
5025 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5026 old_name, self.op.debug_level)
5027 msg = result.fail_msg
5029 msg = ("Could not run OS rename script for instance %s on node %s"
5030 " (but the instance has been renamed in Ganeti): %s" %
5031 (inst.name, inst.primary_node, msg))
5032 self.proc.LogWarning(msg)
5034 _ShutdownInstanceDisks(self, inst)
5039 class LURemoveInstance(LogicalUnit):
5040 """Remove an instance.
5043 HPATH = "instance-remove"
5044 HTYPE = constants.HTYPE_INSTANCE
5047 ("ignore_failures", False, _TBool),
5052 def ExpandNames(self):
5053 self._ExpandAndLockInstance()
5054 self.needed_locks[locking.LEVEL_NODE] = []
5055 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5057 def DeclareLocks(self, level):
5058 if level == locking.LEVEL_NODE:
5059 self._LockInstancesNodes()
5061 def BuildHooksEnv(self):
5064 This runs on master, primary and secondary nodes of the instance.
5067 env = _BuildInstanceHookEnvByObject(self, self.instance)
5068 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5069 nl = [self.cfg.GetMasterNode()]
5070 nl_post = list(self.instance.all_nodes) + nl
5071 return env, nl, nl_post
5073 def CheckPrereq(self):
5074 """Check prerequisites.
5076 This checks that the instance is in the cluster.
5079 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5080 assert self.instance is not None, \
5081 "Cannot retrieve locked instance %s" % self.op.instance_name
5083 def Exec(self, feedback_fn):
5084 """Remove the instance.
5087 instance = self.instance
5088 logging.info("Shutting down instance %s on node %s",
5089 instance.name, instance.primary_node)
5091 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5092 self.op.shutdown_timeout)
5093 msg = result.fail_msg
5095 if self.op.ignore_failures:
5096 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5098 raise errors.OpExecError("Could not shutdown instance %s on"
5100 (instance.name, instance.primary_node, msg))
5102 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5105 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5106 """Utility function to remove an instance.
5109 logging.info("Removing block devices for instance %s", instance.name)
5111 if not _RemoveDisks(lu, instance):
5112 if not ignore_failures:
5113 raise errors.OpExecError("Can't remove instance's disks")
5114 feedback_fn("Warning: can't remove instance's disks")
5116 logging.info("Removing instance %s out of cluster config", instance.name)
5118 lu.cfg.RemoveInstance(instance.name)
5120 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5121 "Instance lock removal conflict"
5123 # Remove lock for the instance
5124 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5127 class LUQueryInstances(NoHooksLU):
5128 """Logical unit for querying instances.
5131 # pylint: disable-msg=W0142
5133 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5134 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5135 ("use_locking", False, _TBool),
5138 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5139 "serial_no", "ctime", "mtime", "uuid"]
5140 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5142 "disk_template", "ip", "mac", "bridge",
5143 "nic_mode", "nic_link",
5144 "sda_size", "sdb_size", "vcpus", "tags",
5145 "network_port", "beparams",
5146 r"(disk)\.(size)/([0-9]+)",
5147 r"(disk)\.(sizes)", "disk_usage",
5148 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5149 r"(nic)\.(bridge)/([0-9]+)",
5150 r"(nic)\.(macs|ips|modes|links|bridges)",
5151 r"(disk|nic)\.(count)",
5153 ] + _SIMPLE_FIELDS +
5155 for name in constants.HVS_PARAMETERS
5156 if name not in constants.HVC_GLOBALS] +
5158 for name in constants.BES_PARAMETERS])
5159 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5165 def CheckArguments(self):
5166 _CheckOutputFields(static=self._FIELDS_STATIC,
5167 dynamic=self._FIELDS_DYNAMIC,
5168 selected=self.op.output_fields)
5170 def ExpandNames(self):
5171 self.needed_locks = {}
5172 self.share_locks[locking.LEVEL_INSTANCE] = 1
5173 self.share_locks[locking.LEVEL_NODE] = 1
5176 self.wanted = _GetWantedInstances(self, self.op.names)
5178 self.wanted = locking.ALL_SET
5180 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5181 self.do_locking = self.do_node_query and self.op.use_locking
5183 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5184 self.needed_locks[locking.LEVEL_NODE] = []
5185 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5187 def DeclareLocks(self, level):
5188 if level == locking.LEVEL_NODE and self.do_locking:
5189 self._LockInstancesNodes()
5191 def Exec(self, feedback_fn):
5192 """Computes the list of nodes and their attributes.
5195 # pylint: disable-msg=R0912
5196 # way too many branches here
5197 all_info = self.cfg.GetAllInstancesInfo()
5198 if self.wanted == locking.ALL_SET:
5199 # caller didn't specify instance names, so ordering is not important
5201 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5203 instance_names = all_info.keys()
5204 instance_names = utils.NiceSort(instance_names)
5206 # caller did specify names, so we must keep the ordering
5208 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5210 tgt_set = all_info.keys()
5211 missing = set(self.wanted).difference(tgt_set)
5213 raise errors.OpExecError("Some instances were removed before"
5214 " retrieving their data: %s" % missing)
5215 instance_names = self.wanted
5217 instance_list = [all_info[iname] for iname in instance_names]
5219 # begin data gathering
5221 nodes = frozenset([inst.primary_node for inst in instance_list])
5222 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5226 if self.do_node_query:
5228 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5230 result = node_data[name]
5232 # offline nodes will be in both lists
5233 off_nodes.append(name)
5235 bad_nodes.append(name)
5238 live_data.update(result.payload)
5239 # else no instance is alive
5241 live_data = dict([(name, {}) for name in instance_names])
5243 # end data gathering
5248 cluster = self.cfg.GetClusterInfo()
5249 for instance in instance_list:
5251 i_hv = cluster.FillHV(instance, skip_globals=True)
5252 i_be = cluster.FillBE(instance)
5253 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5254 for field in self.op.output_fields:
5255 st_match = self._FIELDS_STATIC.Matches(field)
5256 if field in self._SIMPLE_FIELDS:
5257 val = getattr(instance, field)
5258 elif field == "pnode":
5259 val = instance.primary_node
5260 elif field == "snodes":
5261 val = list(instance.secondary_nodes)
5262 elif field == "admin_state":
5263 val = instance.admin_up
5264 elif field == "oper_state":
5265 if instance.primary_node in bad_nodes:
5268 val = bool(live_data.get(instance.name))
5269 elif field == "status":
5270 if instance.primary_node in off_nodes:
5271 val = "ERROR_nodeoffline"
5272 elif instance.primary_node in bad_nodes:
5273 val = "ERROR_nodedown"
5275 running = bool(live_data.get(instance.name))
5277 if instance.admin_up:
5282 if instance.admin_up:
5286 elif field == "oper_ram":
5287 if instance.primary_node in bad_nodes:
5289 elif instance.name in live_data:
5290 val = live_data[instance.name].get("memory", "?")
5293 elif field == "oper_vcpus":
5294 if instance.primary_node in bad_nodes:
5296 elif instance.name in live_data:
5297 val = live_data[instance.name].get("vcpus", "?")
5300 elif field == "vcpus":
5301 val = i_be[constants.BE_VCPUS]
5302 elif field == "disk_template":
5303 val = instance.disk_template
5306 val = instance.nics[0].ip
5309 elif field == "nic_mode":
5311 val = i_nicp[0][constants.NIC_MODE]
5314 elif field == "nic_link":
5316 val = i_nicp[0][constants.NIC_LINK]
5319 elif field == "bridge":
5320 if (instance.nics and
5321 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5322 val = i_nicp[0][constants.NIC_LINK]
5325 elif field == "mac":
5327 val = instance.nics[0].mac
5330 elif field == "sda_size" or field == "sdb_size":
5331 idx = ord(field[2]) - ord('a')
5333 val = instance.FindDisk(idx).size
5334 except errors.OpPrereqError:
5336 elif field == "disk_usage": # total disk usage per node
5337 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5338 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5339 elif field == "tags":
5340 val = list(instance.GetTags())
5341 elif field == "hvparams":
5343 elif (field.startswith(HVPREFIX) and
5344 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5345 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5346 val = i_hv.get(field[len(HVPREFIX):], None)
5347 elif field == "beparams":
5349 elif (field.startswith(BEPREFIX) and
5350 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5351 val = i_be.get(field[len(BEPREFIX):], None)
5352 elif st_match and st_match.groups():
5353 # matches a variable list
5354 st_groups = st_match.groups()
5355 if st_groups and st_groups[0] == "disk":
5356 if st_groups[1] == "count":
5357 val = len(instance.disks)
5358 elif st_groups[1] == "sizes":
5359 val = [disk.size for disk in instance.disks]
5360 elif st_groups[1] == "size":
5362 val = instance.FindDisk(st_groups[2]).size
5363 except errors.OpPrereqError:
5366 assert False, "Unhandled disk parameter"
5367 elif st_groups[0] == "nic":
5368 if st_groups[1] == "count":
5369 val = len(instance.nics)
5370 elif st_groups[1] == "macs":
5371 val = [nic.mac for nic in instance.nics]
5372 elif st_groups[1] == "ips":
5373 val = [nic.ip for nic in instance.nics]
5374 elif st_groups[1] == "modes":
5375 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5376 elif st_groups[1] == "links":
5377 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5378 elif st_groups[1] == "bridges":
5381 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5382 val.append(nicp[constants.NIC_LINK])
5387 nic_idx = int(st_groups[2])
5388 if nic_idx >= len(instance.nics):
5391 if st_groups[1] == "mac":
5392 val = instance.nics[nic_idx].mac
5393 elif st_groups[1] == "ip":
5394 val = instance.nics[nic_idx].ip
5395 elif st_groups[1] == "mode":
5396 val = i_nicp[nic_idx][constants.NIC_MODE]
5397 elif st_groups[1] == "link":
5398 val = i_nicp[nic_idx][constants.NIC_LINK]
5399 elif st_groups[1] == "bridge":
5400 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5401 if nic_mode == constants.NIC_MODE_BRIDGED:
5402 val = i_nicp[nic_idx][constants.NIC_LINK]
5406 assert False, "Unhandled NIC parameter"
5408 assert False, ("Declared but unhandled variable parameter '%s'" %
5411 assert False, "Declared but unhandled parameter '%s'" % field
5418 class LUFailoverInstance(LogicalUnit):
5419 """Failover an instance.
5422 HPATH = "instance-failover"
5423 HTYPE = constants.HTYPE_INSTANCE
5426 ("ignore_consistency", False, _TBool),
5431 def ExpandNames(self):
5432 self._ExpandAndLockInstance()
5433 self.needed_locks[locking.LEVEL_NODE] = []
5434 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5436 def DeclareLocks(self, level):
5437 if level == locking.LEVEL_NODE:
5438 self._LockInstancesNodes()
5440 def BuildHooksEnv(self):
5443 This runs on master, primary and secondary nodes of the instance.
5446 instance = self.instance
5447 source_node = instance.primary_node
5448 target_node = instance.secondary_nodes[0]
5450 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5451 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5452 "OLD_PRIMARY": source_node,
5453 "OLD_SECONDARY": target_node,
5454 "NEW_PRIMARY": target_node,
5455 "NEW_SECONDARY": source_node,
5457 env.update(_BuildInstanceHookEnvByObject(self, instance))
5458 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5460 nl_post.append(source_node)
5461 return env, nl, nl_post
5463 def CheckPrereq(self):
5464 """Check prerequisites.
5466 This checks that the instance is in the cluster.
5469 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5470 assert self.instance is not None, \
5471 "Cannot retrieve locked instance %s" % self.op.instance_name
5473 bep = self.cfg.GetClusterInfo().FillBE(instance)
5474 if instance.disk_template not in constants.DTS_NET_MIRROR:
5475 raise errors.OpPrereqError("Instance's disk layout is not"
5476 " network mirrored, cannot failover.",
5479 secondary_nodes = instance.secondary_nodes
5480 if not secondary_nodes:
5481 raise errors.ProgrammerError("no secondary node but using "
5482 "a mirrored disk template")
5484 target_node = secondary_nodes[0]
5485 _CheckNodeOnline(self, target_node)
5486 _CheckNodeNotDrained(self, target_node)
5487 if instance.admin_up:
5488 # check memory requirements on the secondary node
5489 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5490 instance.name, bep[constants.BE_MEMORY],
5491 instance.hypervisor)
5493 self.LogInfo("Not checking memory on the secondary node as"
5494 " instance will not be started")
5496 # check bridge existance
5497 _CheckInstanceBridgesExist(self, instance, node=target_node)
5499 def Exec(self, feedback_fn):
5500 """Failover an instance.
5502 The failover is done by shutting it down on its present node and
5503 starting it on the secondary.
5506 instance = self.instance
5508 source_node = instance.primary_node
5509 target_node = instance.secondary_nodes[0]
5511 if instance.admin_up:
5512 feedback_fn("* checking disk consistency between source and target")
5513 for dev in instance.disks:
5514 # for drbd, these are drbd over lvm
5515 if not _CheckDiskConsistency(self, dev, target_node, False):
5516 if not self.op.ignore_consistency:
5517 raise errors.OpExecError("Disk %s is degraded on target node,"
5518 " aborting failover." % dev.iv_name)
5520 feedback_fn("* not checking disk consistency as instance is not running")
5522 feedback_fn("* shutting down instance on source node")
5523 logging.info("Shutting down instance %s on node %s",
5524 instance.name, source_node)
5526 result = self.rpc.call_instance_shutdown(source_node, instance,
5527 self.op.shutdown_timeout)
5528 msg = result.fail_msg
5530 if self.op.ignore_consistency:
5531 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5532 " Proceeding anyway. Please make sure node"
5533 " %s is down. Error details: %s",
5534 instance.name, source_node, source_node, msg)
5536 raise errors.OpExecError("Could not shutdown instance %s on"
5538 (instance.name, source_node, msg))
5540 feedback_fn("* deactivating the instance's disks on source node")
5541 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5542 raise errors.OpExecError("Can't shut down the instance's disks.")
5544 instance.primary_node = target_node
5545 # distribute new instance config to the other nodes
5546 self.cfg.Update(instance, feedback_fn)
5548 # Only start the instance if it's marked as up
5549 if instance.admin_up:
5550 feedback_fn("* activating the instance's disks on target node")
5551 logging.info("Starting instance %s on node %s",
5552 instance.name, target_node)
5554 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5555 ignore_secondaries=True)
5557 _ShutdownInstanceDisks(self, instance)
5558 raise errors.OpExecError("Can't activate the instance's disks")
5560 feedback_fn("* starting the instance on the target node")
5561 result = self.rpc.call_instance_start(target_node, instance, None, None)
5562 msg = result.fail_msg
5564 _ShutdownInstanceDisks(self, instance)
5565 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5566 (instance.name, target_node, msg))
5569 class LUMigrateInstance(LogicalUnit):
5570 """Migrate an instance.
5572 This is migration without shutting down, compared to the failover,
5573 which is done with shutdown.
5576 HPATH = "instance-migrate"
5577 HTYPE = constants.HTYPE_INSTANCE
5582 ("cleanup", False, _TBool),
5587 def ExpandNames(self):
5588 self._ExpandAndLockInstance()
5590 self.needed_locks[locking.LEVEL_NODE] = []
5591 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5593 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5595 self.tasklets = [self._migrater]
5597 def DeclareLocks(self, level):
5598 if level == locking.LEVEL_NODE:
5599 self._LockInstancesNodes()
5601 def BuildHooksEnv(self):
5604 This runs on master, primary and secondary nodes of the instance.
5607 instance = self._migrater.instance
5608 source_node = instance.primary_node
5609 target_node = instance.secondary_nodes[0]
5610 env = _BuildInstanceHookEnvByObject(self, instance)
5611 env["MIGRATE_LIVE"] = self._migrater.live
5612 env["MIGRATE_CLEANUP"] = self.op.cleanup
5614 "OLD_PRIMARY": source_node,
5615 "OLD_SECONDARY": target_node,
5616 "NEW_PRIMARY": target_node,
5617 "NEW_SECONDARY": source_node,
5619 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5621 nl_post.append(source_node)
5622 return env, nl, nl_post
5625 class LUMoveInstance(LogicalUnit):
5626 """Move an instance by data-copying.
5629 HPATH = "instance-move"
5630 HTYPE = constants.HTYPE_INSTANCE
5633 ("target_node", _NoDefault, _TNonEmptyString),
5638 def ExpandNames(self):
5639 self._ExpandAndLockInstance()
5640 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5641 self.op.target_node = target_node
5642 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5643 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5645 def DeclareLocks(self, level):
5646 if level == locking.LEVEL_NODE:
5647 self._LockInstancesNodes(primary_only=True)
5649 def BuildHooksEnv(self):
5652 This runs on master, primary and secondary nodes of the instance.
5656 "TARGET_NODE": self.op.target_node,
5657 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5659 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5660 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5661 self.op.target_node]
5664 def CheckPrereq(self):
5665 """Check prerequisites.
5667 This checks that the instance is in the cluster.
5670 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5671 assert self.instance is not None, \
5672 "Cannot retrieve locked instance %s" % self.op.instance_name
5674 node = self.cfg.GetNodeInfo(self.op.target_node)
5675 assert node is not None, \
5676 "Cannot retrieve locked node %s" % self.op.target_node
5678 self.target_node = target_node = node.name
5680 if target_node == instance.primary_node:
5681 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5682 (instance.name, target_node),
5685 bep = self.cfg.GetClusterInfo().FillBE(instance)
5687 for idx, dsk in enumerate(instance.disks):
5688 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5689 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5690 " cannot copy" % idx, errors.ECODE_STATE)
5692 _CheckNodeOnline(self, target_node)
5693 _CheckNodeNotDrained(self, target_node)
5695 if instance.admin_up:
5696 # check memory requirements on the secondary node
5697 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5698 instance.name, bep[constants.BE_MEMORY],
5699 instance.hypervisor)
5701 self.LogInfo("Not checking memory on the secondary node as"
5702 " instance will not be started")
5704 # check bridge existance
5705 _CheckInstanceBridgesExist(self, instance, node=target_node)
5707 def Exec(self, feedback_fn):
5708 """Move an instance.
5710 The move is done by shutting it down on its present node, copying
5711 the data over (slow) and starting it on the new node.
5714 instance = self.instance
5716 source_node = instance.primary_node
5717 target_node = self.target_node
5719 self.LogInfo("Shutting down instance %s on source node %s",
5720 instance.name, source_node)
5722 result = self.rpc.call_instance_shutdown(source_node, instance,
5723 self.op.shutdown_timeout)
5724 msg = result.fail_msg
5726 if self.op.ignore_consistency:
5727 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5728 " Proceeding anyway. Please make sure node"
5729 " %s is down. Error details: %s",
5730 instance.name, source_node, source_node, msg)
5732 raise errors.OpExecError("Could not shutdown instance %s on"
5734 (instance.name, source_node, msg))
5736 # create the target disks
5738 _CreateDisks(self, instance, target_node=target_node)
5739 except errors.OpExecError:
5740 self.LogWarning("Device creation failed, reverting...")
5742 _RemoveDisks(self, instance, target_node=target_node)
5744 self.cfg.ReleaseDRBDMinors(instance.name)
5747 cluster_name = self.cfg.GetClusterInfo().cluster_name
5750 # activate, get path, copy the data over
5751 for idx, disk in enumerate(instance.disks):
5752 self.LogInfo("Copying data for disk %d", idx)
5753 result = self.rpc.call_blockdev_assemble(target_node, disk,
5754 instance.name, True)
5756 self.LogWarning("Can't assemble newly created disk %d: %s",
5757 idx, result.fail_msg)
5758 errs.append(result.fail_msg)
5760 dev_path = result.payload
5761 result = self.rpc.call_blockdev_export(source_node, disk,
5762 target_node, dev_path,
5765 self.LogWarning("Can't copy data over for disk %d: %s",
5766 idx, result.fail_msg)
5767 errs.append(result.fail_msg)
5771 self.LogWarning("Some disks failed to copy, aborting")
5773 _RemoveDisks(self, instance, target_node=target_node)
5775 self.cfg.ReleaseDRBDMinors(instance.name)
5776 raise errors.OpExecError("Errors during disk copy: %s" %
5779 instance.primary_node = target_node
5780 self.cfg.Update(instance, feedback_fn)
5782 self.LogInfo("Removing the disks on the original node")
5783 _RemoveDisks(self, instance, target_node=source_node)
5785 # Only start the instance if it's marked as up
5786 if instance.admin_up:
5787 self.LogInfo("Starting instance %s on node %s",
5788 instance.name, target_node)
5790 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5791 ignore_secondaries=True)
5793 _ShutdownInstanceDisks(self, instance)
5794 raise errors.OpExecError("Can't activate the instance's disks")
5796 result = self.rpc.call_instance_start(target_node, instance, None, None)
5797 msg = result.fail_msg
5799 _ShutdownInstanceDisks(self, instance)
5800 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5801 (instance.name, target_node, msg))
5804 class LUMigrateNode(LogicalUnit):
5805 """Migrate all instances from a node.
5808 HPATH = "node-migrate"
5809 HTYPE = constants.HTYPE_NODE
5817 def ExpandNames(self):
5818 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5820 self.needed_locks = {
5821 locking.LEVEL_NODE: [self.op.node_name],
5824 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5826 # Create tasklets for migrating instances for all instances on this node
5830 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5831 logging.debug("Migrating instance %s", inst.name)
5832 names.append(inst.name)
5834 tasklets.append(TLMigrateInstance(self, inst.name, False))
5836 self.tasklets = tasklets
5838 # Declare instance locks
5839 self.needed_locks[locking.LEVEL_INSTANCE] = names
5841 def DeclareLocks(self, level):
5842 if level == locking.LEVEL_NODE:
5843 self._LockInstancesNodes()
5845 def BuildHooksEnv(self):
5848 This runs on the master, the primary and all the secondaries.
5852 "NODE_NAME": self.op.node_name,
5855 nl = [self.cfg.GetMasterNode()]
5857 return (env, nl, nl)
5860 class TLMigrateInstance(Tasklet):
5861 """Tasklet class for instance migration.
5864 @ivar live: whether the migration will be done live or non-live;
5865 this variable is initalized only after CheckPrereq has run
5868 def __init__(self, lu, instance_name, cleanup):
5869 """Initializes this class.
5872 Tasklet.__init__(self, lu)
5875 self.instance_name = instance_name
5876 self.cleanup = cleanup
5877 self.live = False # will be overridden later
5879 def CheckPrereq(self):
5880 """Check prerequisites.
5882 This checks that the instance is in the cluster.
5885 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5886 instance = self.cfg.GetInstanceInfo(instance_name)
5887 assert instance is not None
5889 if instance.disk_template != constants.DT_DRBD8:
5890 raise errors.OpPrereqError("Instance's disk layout is not"
5891 " drbd8, cannot migrate.", errors.ECODE_STATE)
5893 secondary_nodes = instance.secondary_nodes
5894 if not secondary_nodes:
5895 raise errors.ConfigurationError("No secondary node but using"
5896 " drbd8 disk template")
5898 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5900 target_node = secondary_nodes[0]
5901 # check memory requirements on the secondary node
5902 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5903 instance.name, i_be[constants.BE_MEMORY],
5904 instance.hypervisor)
5906 # check bridge existance
5907 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5909 if not self.cleanup:
5910 _CheckNodeNotDrained(self.lu, target_node)
5911 result = self.rpc.call_instance_migratable(instance.primary_node,
5913 result.Raise("Can't migrate, please use failover",
5914 prereq=True, ecode=errors.ECODE_STATE)
5916 self.instance = instance
5918 if self.lu.op.live is not None and self.lu.op.mode is not None:
5919 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5920 " parameters are accepted",
5922 if self.lu.op.live is not None:
5924 self.lu.op.mode = constants.HT_MIGRATION_LIVE
5926 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5927 # reset the 'live' parameter to None so that repeated
5928 # invocations of CheckPrereq do not raise an exception
5929 self.lu.op.live = None
5930 elif self.lu.op.mode is None:
5931 # read the default value from the hypervisor
5932 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5933 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5935 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5937 def _WaitUntilSync(self):
5938 """Poll with custom rpc for disk sync.
5940 This uses our own step-based rpc call.
5943 self.feedback_fn("* wait until resync is done")
5947 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5949 self.instance.disks)
5951 for node, nres in result.items():
5952 nres.Raise("Cannot resync disks on node %s" % node)
5953 node_done, node_percent = nres.payload
5954 all_done = all_done and node_done
5955 if node_percent is not None:
5956 min_percent = min(min_percent, node_percent)
5958 if min_percent < 100:
5959 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5962 def _EnsureSecondary(self, node):
5963 """Demote a node to secondary.
5966 self.feedback_fn("* switching node %s to secondary mode" % node)
5968 for dev in self.instance.disks:
5969 self.cfg.SetDiskID(dev, node)
5971 result = self.rpc.call_blockdev_close(node, self.instance.name,
5972 self.instance.disks)
5973 result.Raise("Cannot change disk to secondary on node %s" % node)
5975 def _GoStandalone(self):
5976 """Disconnect from the network.
5979 self.feedback_fn("* changing into standalone mode")
5980 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5981 self.instance.disks)
5982 for node, nres in result.items():
5983 nres.Raise("Cannot disconnect disks node %s" % node)
5985 def _GoReconnect(self, multimaster):
5986 """Reconnect to the network.
5992 msg = "single-master"
5993 self.feedback_fn("* changing disks into %s mode" % msg)
5994 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5995 self.instance.disks,
5996 self.instance.name, multimaster)
5997 for node, nres in result.items():
5998 nres.Raise("Cannot change disks config on node %s" % node)
6000 def _ExecCleanup(self):
6001 """Try to cleanup after a failed migration.
6003 The cleanup is done by:
6004 - check that the instance is running only on one node
6005 (and update the config if needed)
6006 - change disks on its secondary node to secondary
6007 - wait until disks are fully synchronized
6008 - disconnect from the network
6009 - change disks into single-master mode
6010 - wait again until disks are fully synchronized
6013 instance = self.instance
6014 target_node = self.target_node
6015 source_node = self.source_node
6017 # check running on only one node
6018 self.feedback_fn("* checking where the instance actually runs"
6019 " (if this hangs, the hypervisor might be in"
6021 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6022 for node, result in ins_l.items():
6023 result.Raise("Can't contact node %s" % node)
6025 runningon_source = instance.name in ins_l[source_node].payload
6026 runningon_target = instance.name in ins_l[target_node].payload
6028 if runningon_source and runningon_target:
6029 raise errors.OpExecError("Instance seems to be running on two nodes,"
6030 " or the hypervisor is confused. You will have"
6031 " to ensure manually that it runs only on one"
6032 " and restart this operation.")
6034 if not (runningon_source or runningon_target):
6035 raise errors.OpExecError("Instance does not seem to be running at all."
6036 " In this case, it's safer to repair by"
6037 " running 'gnt-instance stop' to ensure disk"
6038 " shutdown, and then restarting it.")
6040 if runningon_target:
6041 # the migration has actually succeeded, we need to update the config
6042 self.feedback_fn("* instance running on secondary node (%s),"
6043 " updating config" % target_node)
6044 instance.primary_node = target_node
6045 self.cfg.Update(instance, self.feedback_fn)
6046 demoted_node = source_node
6048 self.feedback_fn("* instance confirmed to be running on its"
6049 " primary node (%s)" % source_node)
6050 demoted_node = target_node
6052 self._EnsureSecondary(demoted_node)
6054 self._WaitUntilSync()
6055 except errors.OpExecError:
6056 # we ignore here errors, since if the device is standalone, it
6057 # won't be able to sync
6059 self._GoStandalone()
6060 self._GoReconnect(False)
6061 self._WaitUntilSync()
6063 self.feedback_fn("* done")
6065 def _RevertDiskStatus(self):
6066 """Try to revert the disk status after a failed migration.
6069 target_node = self.target_node
6071 self._EnsureSecondary(target_node)
6072 self._GoStandalone()
6073 self._GoReconnect(False)
6074 self._WaitUntilSync()
6075 except errors.OpExecError, err:
6076 self.lu.LogWarning("Migration failed and I can't reconnect the"
6077 " drives: error '%s'\n"
6078 "Please look and recover the instance status" %
6081 def _AbortMigration(self):
6082 """Call the hypervisor code to abort a started migration.
6085 instance = self.instance
6086 target_node = self.target_node
6087 migration_info = self.migration_info
6089 abort_result = self.rpc.call_finalize_migration(target_node,
6093 abort_msg = abort_result.fail_msg
6095 logging.error("Aborting migration failed on target node %s: %s",
6096 target_node, abort_msg)
6097 # Don't raise an exception here, as we stil have to try to revert the
6098 # disk status, even if this step failed.
6100 def _ExecMigration(self):
6101 """Migrate an instance.
6103 The migrate is done by:
6104 - change the disks into dual-master mode
6105 - wait until disks are fully synchronized again
6106 - migrate the instance
6107 - change disks on the new secondary node (the old primary) to secondary
6108 - wait until disks are fully synchronized
6109 - change disks into single-master mode
6112 instance = self.instance
6113 target_node = self.target_node
6114 source_node = self.source_node
6116 self.feedback_fn("* checking disk consistency between source and target")
6117 for dev in instance.disks:
6118 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6119 raise errors.OpExecError("Disk %s is degraded or not fully"
6120 " synchronized on target node,"
6121 " aborting migrate." % dev.iv_name)
6123 # First get the migration information from the remote node
6124 result = self.rpc.call_migration_info(source_node, instance)
6125 msg = result.fail_msg
6127 log_err = ("Failed fetching source migration information from %s: %s" %
6129 logging.error(log_err)
6130 raise errors.OpExecError(log_err)
6132 self.migration_info = migration_info = result.payload
6134 # Then switch the disks to master/master mode
6135 self._EnsureSecondary(target_node)
6136 self._GoStandalone()
6137 self._GoReconnect(True)
6138 self._WaitUntilSync()
6140 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6141 result = self.rpc.call_accept_instance(target_node,
6144 self.nodes_ip[target_node])
6146 msg = result.fail_msg
6148 logging.error("Instance pre-migration failed, trying to revert"
6149 " disk status: %s", msg)
6150 self.feedback_fn("Pre-migration failed, aborting")
6151 self._AbortMigration()
6152 self._RevertDiskStatus()
6153 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6154 (instance.name, msg))
6156 self.feedback_fn("* migrating instance to %s" % target_node)
6158 result = self.rpc.call_instance_migrate(source_node, instance,
6159 self.nodes_ip[target_node],
6161 msg = result.fail_msg
6163 logging.error("Instance migration failed, trying to revert"
6164 " disk status: %s", msg)
6165 self.feedback_fn("Migration failed, aborting")
6166 self._AbortMigration()
6167 self._RevertDiskStatus()
6168 raise errors.OpExecError("Could not migrate instance %s: %s" %
6169 (instance.name, msg))
6172 instance.primary_node = target_node
6173 # distribute new instance config to the other nodes
6174 self.cfg.Update(instance, self.feedback_fn)
6176 result = self.rpc.call_finalize_migration(target_node,
6180 msg = result.fail_msg
6182 logging.error("Instance migration succeeded, but finalization failed:"
6184 raise errors.OpExecError("Could not finalize instance migration: %s" %
6187 self._EnsureSecondary(source_node)
6188 self._WaitUntilSync()
6189 self._GoStandalone()
6190 self._GoReconnect(False)
6191 self._WaitUntilSync()
6193 self.feedback_fn("* done")
6195 def Exec(self, feedback_fn):
6196 """Perform the migration.
6199 feedback_fn("Migrating instance %s" % self.instance.name)
6201 self.feedback_fn = feedback_fn
6203 self.source_node = self.instance.primary_node
6204 self.target_node = self.instance.secondary_nodes[0]
6205 self.all_nodes = [self.source_node, self.target_node]
6207 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6208 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6212 return self._ExecCleanup()
6214 return self._ExecMigration()
6217 def _CreateBlockDev(lu, node, instance, device, force_create,
6219 """Create a tree of block devices on a given node.
6221 If this device type has to be created on secondaries, create it and
6224 If not, just recurse to children keeping the same 'force' value.
6226 @param lu: the lu on whose behalf we execute
6227 @param node: the node on which to create the device
6228 @type instance: L{objects.Instance}
6229 @param instance: the instance which owns the device
6230 @type device: L{objects.Disk}
6231 @param device: the device to create
6232 @type force_create: boolean
6233 @param force_create: whether to force creation of this device; this
6234 will be change to True whenever we find a device which has
6235 CreateOnSecondary() attribute
6236 @param info: the extra 'metadata' we should attach to the device
6237 (this will be represented as a LVM tag)
6238 @type force_open: boolean
6239 @param force_open: this parameter will be passes to the
6240 L{backend.BlockdevCreate} function where it specifies
6241 whether we run on primary or not, and it affects both
6242 the child assembly and the device own Open() execution
6245 if device.CreateOnSecondary():
6249 for child in device.children:
6250 _CreateBlockDev(lu, node, instance, child, force_create,
6253 if not force_create:
6256 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6259 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6260 """Create a single block device on a given node.
6262 This will not recurse over children of the device, so they must be
6265 @param lu: the lu on whose behalf we execute
6266 @param node: the node on which to create the device
6267 @type instance: L{objects.Instance}
6268 @param instance: the instance which owns the device
6269 @type device: L{objects.Disk}
6270 @param device: the device to create
6271 @param info: the extra 'metadata' we should attach to the device
6272 (this will be represented as a LVM tag)
6273 @type force_open: boolean
6274 @param force_open: this parameter will be passes to the
6275 L{backend.BlockdevCreate} function where it specifies
6276 whether we run on primary or not, and it affects both
6277 the child assembly and the device own Open() execution
6280 lu.cfg.SetDiskID(device, node)
6281 result = lu.rpc.call_blockdev_create(node, device, device.size,
6282 instance.name, force_open, info)
6283 result.Raise("Can't create block device %s on"
6284 " node %s for instance %s" % (device, node, instance.name))
6285 if device.physical_id is None:
6286 device.physical_id = result.payload
6289 def _GenerateUniqueNames(lu, exts):
6290 """Generate a suitable LV name.
6292 This will generate a logical volume name for the given instance.
6297 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6298 results.append("%s%s" % (new_id, val))
6302 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6304 """Generate a drbd8 device complete with its children.
6307 port = lu.cfg.AllocatePort()
6308 vgname = lu.cfg.GetVGName()
6309 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6310 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6311 logical_id=(vgname, names[0]))
6312 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6313 logical_id=(vgname, names[1]))
6314 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6315 logical_id=(primary, secondary, port,
6318 children=[dev_data, dev_meta],
6323 def _GenerateDiskTemplate(lu, template_name,
6324 instance_name, primary_node,
6325 secondary_nodes, disk_info,
6326 file_storage_dir, file_driver,
6328 """Generate the entire disk layout for a given template type.
6331 #TODO: compute space requirements
6333 vgname = lu.cfg.GetVGName()
6334 disk_count = len(disk_info)
6336 if template_name == constants.DT_DISKLESS:
6338 elif template_name == constants.DT_PLAIN:
6339 if len(secondary_nodes) != 0:
6340 raise errors.ProgrammerError("Wrong template configuration")
6342 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6343 for i in range(disk_count)])
6344 for idx, disk in enumerate(disk_info):
6345 disk_index = idx + base_index
6346 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6347 logical_id=(vgname, names[idx]),
6348 iv_name="disk/%d" % disk_index,
6350 disks.append(disk_dev)
6351 elif template_name == constants.DT_DRBD8:
6352 if len(secondary_nodes) != 1:
6353 raise errors.ProgrammerError("Wrong template configuration")
6354 remote_node = secondary_nodes[0]
6355 minors = lu.cfg.AllocateDRBDMinor(
6356 [primary_node, remote_node] * len(disk_info), instance_name)
6359 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6360 for i in range(disk_count)]):
6361 names.append(lv_prefix + "_data")
6362 names.append(lv_prefix + "_meta")
6363 for idx, disk in enumerate(disk_info):
6364 disk_index = idx + base_index
6365 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6366 disk["size"], names[idx*2:idx*2+2],
6367 "disk/%d" % disk_index,
6368 minors[idx*2], minors[idx*2+1])
6369 disk_dev.mode = disk["mode"]
6370 disks.append(disk_dev)
6371 elif template_name == constants.DT_FILE:
6372 if len(secondary_nodes) != 0:
6373 raise errors.ProgrammerError("Wrong template configuration")
6375 _RequireFileStorage()
6377 for idx, disk in enumerate(disk_info):
6378 disk_index = idx + base_index
6379 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6380 iv_name="disk/%d" % disk_index,
6381 logical_id=(file_driver,
6382 "%s/disk%d" % (file_storage_dir,
6385 disks.append(disk_dev)
6387 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6391 def _GetInstanceInfoText(instance):
6392 """Compute that text that should be added to the disk's metadata.
6395 return "originstname+%s" % instance.name
6398 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6399 """Create all disks for an instance.
6401 This abstracts away some work from AddInstance.
6403 @type lu: L{LogicalUnit}
6404 @param lu: the logical unit on whose behalf we execute
6405 @type instance: L{objects.Instance}
6406 @param instance: the instance whose disks we should create
6408 @param to_skip: list of indices to skip
6409 @type target_node: string
6410 @param target_node: if passed, overrides the target node for creation
6412 @return: the success of the creation
6415 info = _GetInstanceInfoText(instance)
6416 if target_node is None:
6417 pnode = instance.primary_node
6418 all_nodes = instance.all_nodes
6423 if instance.disk_template == constants.DT_FILE:
6424 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6425 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6427 result.Raise("Failed to create directory '%s' on"
6428 " node %s" % (file_storage_dir, pnode))
6430 # Note: this needs to be kept in sync with adding of disks in
6431 # LUSetInstanceParams
6432 for idx, device in enumerate(instance.disks):
6433 if to_skip and idx in to_skip:
6435 logging.info("Creating volume %s for instance %s",
6436 device.iv_name, instance.name)
6438 for node in all_nodes:
6439 f_create = node == pnode
6440 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6443 def _RemoveDisks(lu, instance, target_node=None):
6444 """Remove all disks for an instance.
6446 This abstracts away some work from `AddInstance()` and
6447 `RemoveInstance()`. Note that in case some of the devices couldn't
6448 be removed, the removal will continue with the other ones (compare
6449 with `_CreateDisks()`).
6451 @type lu: L{LogicalUnit}
6452 @param lu: the logical unit on whose behalf we execute
6453 @type instance: L{objects.Instance}
6454 @param instance: the instance whose disks we should remove
6455 @type target_node: string
6456 @param target_node: used to override the node on which to remove the disks
6458 @return: the success of the removal
6461 logging.info("Removing block devices for instance %s", instance.name)
6464 for device in instance.disks:
6466 edata = [(target_node, device)]
6468 edata = device.ComputeNodeTree(instance.primary_node)
6469 for node, disk in edata:
6470 lu.cfg.SetDiskID(disk, node)
6471 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6473 lu.LogWarning("Could not remove block device %s on node %s,"
6474 " continuing anyway: %s", device.iv_name, node, msg)
6477 if instance.disk_template == constants.DT_FILE:
6478 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6482 tgt = instance.primary_node
6483 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6485 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6486 file_storage_dir, instance.primary_node, result.fail_msg)
6492 def _ComputeDiskSize(disk_template, disks):
6493 """Compute disk size requirements in the volume group
6496 # Required free disk space as a function of disk and swap space
6498 constants.DT_DISKLESS: None,
6499 constants.DT_PLAIN: sum(d["size"] for d in disks),
6500 # 128 MB are added for drbd metadata for each disk
6501 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6502 constants.DT_FILE: None,
6505 if disk_template not in req_size_dict:
6506 raise errors.ProgrammerError("Disk template '%s' size requirement"
6507 " is unknown" % disk_template)
6509 return req_size_dict[disk_template]
6512 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6513 """Hypervisor parameter validation.
6515 This function abstract the hypervisor parameter validation to be
6516 used in both instance create and instance modify.
6518 @type lu: L{LogicalUnit}
6519 @param lu: the logical unit for which we check
6520 @type nodenames: list
6521 @param nodenames: the list of nodes on which we should check
6522 @type hvname: string
6523 @param hvname: the name of the hypervisor we should use
6524 @type hvparams: dict
6525 @param hvparams: the parameters which we need to check
6526 @raise errors.OpPrereqError: if the parameters are not valid
6529 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6532 for node in nodenames:
6536 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6539 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6540 """OS parameters validation.
6542 @type lu: L{LogicalUnit}
6543 @param lu: the logical unit for which we check
6544 @type required: boolean
6545 @param required: whether the validation should fail if the OS is not
6547 @type nodenames: list
6548 @param nodenames: the list of nodes on which we should check
6549 @type osname: string
6550 @param osname: the name of the hypervisor we should use
6551 @type osparams: dict
6552 @param osparams: the parameters which we need to check
6553 @raise errors.OpPrereqError: if the parameters are not valid
6556 result = lu.rpc.call_os_validate(required, nodenames, osname,
6557 [constants.OS_VALIDATE_PARAMETERS],
6559 for node, nres in result.items():
6560 # we don't check for offline cases since this should be run only
6561 # against the master node and/or an instance's nodes
6562 nres.Raise("OS Parameters validation failed on node %s" % node)
6563 if not nres.payload:
6564 lu.LogInfo("OS %s not found on node %s, validation skipped",
6568 class LUCreateInstance(LogicalUnit):
6569 """Create an instance.
6572 HPATH = "instance-add"
6573 HTYPE = constants.HTYPE_INSTANCE
6576 ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6577 ("start", True, _TBool),
6578 ("wait_for_sync", True, _TBool),
6579 ("ip_check", True, _TBool),
6580 ("name_check", True, _TBool),
6581 ("disks", _NoDefault, _TListOf(_TDict)),
6582 ("nics", _NoDefault, _TListOf(_TDict)),
6583 ("hvparams", _EmptyDict, _TDict),
6584 ("beparams", _EmptyDict, _TDict),
6585 ("osparams", _EmptyDict, _TDict),
6586 ("no_install", None, _TMaybeBool),
6587 ("os_type", None, _TMaybeString),
6588 ("force_variant", False, _TBool),
6589 ("source_handshake", None, _TOr(_TList, _TNone)),
6590 ("source_x509_ca", None, _TMaybeString),
6591 ("source_instance_name", None, _TMaybeString),
6592 ("src_node", None, _TMaybeString),
6593 ("src_path", None, _TMaybeString),
6594 ("pnode", None, _TMaybeString),
6595 ("snode", None, _TMaybeString),
6596 ("iallocator", None, _TMaybeString),
6597 ("hypervisor", None, _TMaybeString),
6598 ("disk_template", _NoDefault, _CheckDiskTemplate),
6599 ("identify_defaults", False, _TBool),
6600 ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6601 ("file_storage_dir", None, _TMaybeString),
6605 def CheckArguments(self):
6609 # do not require name_check to ease forward/backward compatibility
6611 if self.op.no_install and self.op.start:
6612 self.LogInfo("No-installation mode selected, disabling startup")
6613 self.op.start = False
6614 # validate/normalize the instance name
6615 self.op.instance_name = \
6616 netutils.HostInfo.NormalizeName(self.op.instance_name)
6618 if self.op.ip_check and not self.op.name_check:
6619 # TODO: make the ip check more flexible and not depend on the name check
6620 raise errors.OpPrereqError("Cannot do ip check without a name check",
6623 # check nics' parameter names
6624 for nic in self.op.nics:
6625 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6627 # check disks. parameter names and consistent adopt/no-adopt strategy
6628 has_adopt = has_no_adopt = False
6629 for disk in self.op.disks:
6630 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6635 if has_adopt and has_no_adopt:
6636 raise errors.OpPrereqError("Either all disks are adopted or none is",
6639 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6640 raise errors.OpPrereqError("Disk adoption is not supported for the"
6641 " '%s' disk template" %
6642 self.op.disk_template,
6644 if self.op.iallocator is not None:
6645 raise errors.OpPrereqError("Disk adoption not allowed with an"
6646 " iallocator script", errors.ECODE_INVAL)
6647 if self.op.mode == constants.INSTANCE_IMPORT:
6648 raise errors.OpPrereqError("Disk adoption not allowed for"
6649 " instance import", errors.ECODE_INVAL)
6651 self.adopt_disks = has_adopt
6653 # instance name verification
6654 if self.op.name_check:
6655 self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6656 self.op.instance_name = self.hostname1.name
6657 # used in CheckPrereq for ip ping check
6658 self.check_ip = self.hostname1.ip
6659 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6660 raise errors.OpPrereqError("Remote imports require names to be checked" %
6663 self.check_ip = None
6665 # file storage checks
6666 if (self.op.file_driver and
6667 not self.op.file_driver in constants.FILE_DRIVER):
6668 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6669 self.op.file_driver, errors.ECODE_INVAL)
6671 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6672 raise errors.OpPrereqError("File storage directory path not absolute",
6675 ### Node/iallocator related checks
6676 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6678 if self.op.pnode is not None:
6679 if self.op.disk_template in constants.DTS_NET_MIRROR:
6680 if self.op.snode is None:
6681 raise errors.OpPrereqError("The networked disk templates need"
6682 " a mirror node", errors.ECODE_INVAL)
6684 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6686 self.op.snode = None
6688 self._cds = _GetClusterDomainSecret()
6690 if self.op.mode == constants.INSTANCE_IMPORT:
6691 # On import force_variant must be True, because if we forced it at
6692 # initial install, our only chance when importing it back is that it
6694 self.op.force_variant = True
6696 if self.op.no_install:
6697 self.LogInfo("No-installation mode has no effect during import")
6699 elif self.op.mode == constants.INSTANCE_CREATE:
6700 if self.op.os_type is None:
6701 raise errors.OpPrereqError("No guest OS specified",
6703 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_oss:
6704 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6705 " installation" % self.op.os_type,
6707 if self.op.disk_template is None:
6708 raise errors.OpPrereqError("No disk template specified",
6711 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6712 # Check handshake to ensure both clusters have the same domain secret
6713 src_handshake = self.op.source_handshake
6714 if not src_handshake:
6715 raise errors.OpPrereqError("Missing source handshake",
6718 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6721 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6724 # Load and check source CA
6725 self.source_x509_ca_pem = self.op.source_x509_ca
6726 if not self.source_x509_ca_pem:
6727 raise errors.OpPrereqError("Missing source X509 CA",
6731 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6733 except OpenSSL.crypto.Error, err:
6734 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6735 (err, ), errors.ECODE_INVAL)
6737 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6738 if errcode is not None:
6739 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6742 self.source_x509_ca = cert
6744 src_instance_name = self.op.source_instance_name
6745 if not src_instance_name:
6746 raise errors.OpPrereqError("Missing source instance name",
6749 norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6750 self.source_instance_name = netutils.GetHostInfo(norm_name).name
6753 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6754 self.op.mode, errors.ECODE_INVAL)
6756 def ExpandNames(self):
6757 """ExpandNames for CreateInstance.
6759 Figure out the right locks for instance creation.
6762 self.needed_locks = {}
6764 instance_name = self.op.instance_name
6765 # this is just a preventive check, but someone might still add this
6766 # instance in the meantime, and creation will fail at lock-add time
6767 if instance_name in self.cfg.GetInstanceList():
6768 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6769 instance_name, errors.ECODE_EXISTS)
6771 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6773 if self.op.iallocator:
6774 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6776 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6777 nodelist = [self.op.pnode]
6778 if self.op.snode is not None:
6779 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6780 nodelist.append(self.op.snode)
6781 self.needed_locks[locking.LEVEL_NODE] = nodelist
6783 # in case of import lock the source node too
6784 if self.op.mode == constants.INSTANCE_IMPORT:
6785 src_node = self.op.src_node
6786 src_path = self.op.src_path
6788 if src_path is None:
6789 self.op.src_path = src_path = self.op.instance_name
6791 if src_node is None:
6792 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6793 self.op.src_node = None
6794 if os.path.isabs(src_path):
6795 raise errors.OpPrereqError("Importing an instance from an absolute"
6796 " path requires a source node option.",
6799 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6800 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6801 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6802 if not os.path.isabs(src_path):
6803 self.op.src_path = src_path = \
6804 utils.PathJoin(constants.EXPORT_DIR, src_path)
6806 def _RunAllocator(self):
6807 """Run the allocator based on input opcode.
6810 nics = [n.ToDict() for n in self.nics]
6811 ial = IAllocator(self.cfg, self.rpc,
6812 mode=constants.IALLOCATOR_MODE_ALLOC,
6813 name=self.op.instance_name,
6814 disk_template=self.op.disk_template,
6817 vcpus=self.be_full[constants.BE_VCPUS],
6818 mem_size=self.be_full[constants.BE_MEMORY],
6821 hypervisor=self.op.hypervisor,
6824 ial.Run(self.op.iallocator)
6827 raise errors.OpPrereqError("Can't compute nodes using"
6828 " iallocator '%s': %s" %
6829 (self.op.iallocator, ial.info),
6831 if len(ial.result) != ial.required_nodes:
6832 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6833 " of nodes (%s), required %s" %
6834 (self.op.iallocator, len(ial.result),
6835 ial.required_nodes), errors.ECODE_FAULT)
6836 self.op.pnode = ial.result[0]
6837 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6838 self.op.instance_name, self.op.iallocator,
6839 utils.CommaJoin(ial.result))
6840 if ial.required_nodes == 2:
6841 self.op.snode = ial.result[1]
6843 def BuildHooksEnv(self):
6846 This runs on master, primary and secondary nodes of the instance.
6850 "ADD_MODE": self.op.mode,
6852 if self.op.mode == constants.INSTANCE_IMPORT:
6853 env["SRC_NODE"] = self.op.src_node
6854 env["SRC_PATH"] = self.op.src_path
6855 env["SRC_IMAGES"] = self.src_images
6857 env.update(_BuildInstanceHookEnv(
6858 name=self.op.instance_name,
6859 primary_node=self.op.pnode,
6860 secondary_nodes=self.secondaries,
6861 status=self.op.start,
6862 os_type=self.op.os_type,
6863 memory=self.be_full[constants.BE_MEMORY],
6864 vcpus=self.be_full[constants.BE_VCPUS],
6865 nics=_NICListToTuple(self, self.nics),
6866 disk_template=self.op.disk_template,
6867 disks=[(d["size"], d["mode"]) for d in self.disks],
6870 hypervisor_name=self.op.hypervisor,
6873 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6877 def _ReadExportInfo(self):
6878 """Reads the export information from disk.
6880 It will override the opcode source node and path with the actual
6881 information, if these two were not specified before.
6883 @return: the export information
6886 assert self.op.mode == constants.INSTANCE_IMPORT
6888 src_node = self.op.src_node
6889 src_path = self.op.src_path
6891 if src_node is None:
6892 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6893 exp_list = self.rpc.call_export_list(locked_nodes)
6895 for node in exp_list:
6896 if exp_list[node].fail_msg:
6898 if src_path in exp_list[node].payload:
6900 self.op.src_node = src_node = node
6901 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6905 raise errors.OpPrereqError("No export found for relative path %s" %
6906 src_path, errors.ECODE_INVAL)
6908 _CheckNodeOnline(self, src_node)
6909 result = self.rpc.call_export_info(src_node, src_path)
6910 result.Raise("No export or invalid export found in dir %s" % src_path)
6912 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6913 if not export_info.has_section(constants.INISECT_EXP):
6914 raise errors.ProgrammerError("Corrupted export config",
6915 errors.ECODE_ENVIRON)
6917 ei_version = export_info.get(constants.INISECT_EXP, "version")
6918 if (int(ei_version) != constants.EXPORT_VERSION):
6919 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6920 (ei_version, constants.EXPORT_VERSION),
6921 errors.ECODE_ENVIRON)
6924 def _ReadExportParams(self, einfo):
6925 """Use export parameters as defaults.
6927 In case the opcode doesn't specify (as in override) some instance
6928 parameters, then try to use them from the export information, if
6932 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6934 if self.op.disk_template is None:
6935 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6936 self.op.disk_template = einfo.get(constants.INISECT_INS,
6939 raise errors.OpPrereqError("No disk template specified and the export"
6940 " is missing the disk_template information",
6943 if not self.op.disks:
6944 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6946 # TODO: import the disk iv_name too
6947 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6948 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6949 disks.append({"size": disk_sz})
6950 self.op.disks = disks
6952 raise errors.OpPrereqError("No disk info specified and the export"
6953 " is missing the disk information",
6956 if (not self.op.nics and
6957 einfo.has_option(constants.INISECT_INS, "nic_count")):
6959 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6961 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6962 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6967 if (self.op.hypervisor is None and
6968 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6969 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6970 if einfo.has_section(constants.INISECT_HYP):
6971 # use the export parameters but do not override the ones
6972 # specified by the user
6973 for name, value in einfo.items(constants.INISECT_HYP):
6974 if name not in self.op.hvparams:
6975 self.op.hvparams[name] = value
6977 if einfo.has_section(constants.INISECT_BEP):
6978 # use the parameters, without overriding
6979 for name, value in einfo.items(constants.INISECT_BEP):
6980 if name not in self.op.beparams:
6981 self.op.beparams[name] = value
6983 # try to read the parameters old style, from the main section
6984 for name in constants.BES_PARAMETERS:
6985 if (name not in self.op.beparams and
6986 einfo.has_option(constants.INISECT_INS, name)):
6987 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6989 if einfo.has_section(constants.INISECT_OSP):
6990 # use the parameters, without overriding
6991 for name, value in einfo.items(constants.INISECT_OSP):
6992 if name not in self.op.osparams:
6993 self.op.osparams[name] = value
6995 def _RevertToDefaults(self, cluster):
6996 """Revert the instance parameters to the default values.
7000 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7001 for name in self.op.hvparams.keys():
7002 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7003 del self.op.hvparams[name]
7005 be_defs = cluster.SimpleFillBE({})
7006 for name in self.op.beparams.keys():
7007 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7008 del self.op.beparams[name]
7010 nic_defs = cluster.SimpleFillNIC({})
7011 for nic in self.op.nics:
7012 for name in constants.NICS_PARAMETERS:
7013 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7016 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7017 for name in self.op.osparams.keys():
7018 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7019 del self.op.osparams[name]
7021 def CheckPrereq(self):
7022 """Check prerequisites.
7025 if self.op.mode == constants.INSTANCE_IMPORT:
7026 export_info = self._ReadExportInfo()
7027 self._ReadExportParams(export_info)
7029 _CheckDiskTemplate(self.op.disk_template)
7031 if (not self.cfg.GetVGName() and
7032 self.op.disk_template not in constants.DTS_NOT_LVM):
7033 raise errors.OpPrereqError("Cluster does not support lvm-based"
7034 " instances", errors.ECODE_STATE)
7036 if self.op.hypervisor is None:
7037 self.op.hypervisor = self.cfg.GetHypervisorType()
7039 cluster = self.cfg.GetClusterInfo()
7040 enabled_hvs = cluster.enabled_hypervisors
7041 if self.op.hypervisor not in enabled_hvs:
7042 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7043 " cluster (%s)" % (self.op.hypervisor,
7044 ",".join(enabled_hvs)),
7047 # check hypervisor parameter syntax (locally)
7048 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7049 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7051 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7052 hv_type.CheckParameterSyntax(filled_hvp)
7053 self.hv_full = filled_hvp
7054 # check that we don't specify global parameters on an instance
7055 _CheckGlobalHvParams(self.op.hvparams)
7057 # fill and remember the beparams dict
7058 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7059 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7061 # build os parameters
7062 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7064 # now that hvp/bep are in final format, let's reset to defaults,
7066 if self.op.identify_defaults:
7067 self._RevertToDefaults(cluster)
7071 for idx, nic in enumerate(self.op.nics):
7072 nic_mode_req = nic.get("mode", None)
7073 nic_mode = nic_mode_req
7074 if nic_mode is None:
7075 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7077 # in routed mode, for the first nic, the default ip is 'auto'
7078 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7079 default_ip_mode = constants.VALUE_AUTO
7081 default_ip_mode = constants.VALUE_NONE
7083 # ip validity checks
7084 ip = nic.get("ip", default_ip_mode)
7085 if ip is None or ip.lower() == constants.VALUE_NONE:
7087 elif ip.lower() == constants.VALUE_AUTO:
7088 if not self.op.name_check:
7089 raise errors.OpPrereqError("IP address set to auto but name checks"
7090 " have been skipped. Aborting.",
7092 nic_ip = self.hostname1.ip
7094 if not netutils.IsValidIP4(ip):
7095 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7096 " like a valid IP" % ip,
7100 # TODO: check the ip address for uniqueness
7101 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7102 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7105 # MAC address verification
7106 mac = nic.get("mac", constants.VALUE_AUTO)
7107 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7108 mac = utils.NormalizeAndValidateMac(mac)
7111 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7112 except errors.ReservationError:
7113 raise errors.OpPrereqError("MAC address %s already in use"
7114 " in cluster" % mac,
7115 errors.ECODE_NOTUNIQUE)
7117 # bridge verification
7118 bridge = nic.get("bridge", None)
7119 link = nic.get("link", None)
7121 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7122 " at the same time", errors.ECODE_INVAL)
7123 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7124 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7131 nicparams[constants.NIC_MODE] = nic_mode_req
7133 nicparams[constants.NIC_LINK] = link
7135 check_params = cluster.SimpleFillNIC(nicparams)
7136 objects.NIC.CheckParameterSyntax(check_params)
7137 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7139 # disk checks/pre-build
7141 for disk in self.op.disks:
7142 mode = disk.get("mode", constants.DISK_RDWR)
7143 if mode not in constants.DISK_ACCESS_SET:
7144 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7145 mode, errors.ECODE_INVAL)
7146 size = disk.get("size", None)
7148 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7151 except (TypeError, ValueError):
7152 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7154 new_disk = {"size": size, "mode": mode}
7156 new_disk["adopt"] = disk["adopt"]
7157 self.disks.append(new_disk)
7159 if self.op.mode == constants.INSTANCE_IMPORT:
7161 # Check that the new instance doesn't have less disks than the export
7162 instance_disks = len(self.disks)
7163 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7164 if instance_disks < export_disks:
7165 raise errors.OpPrereqError("Not enough disks to import."
7166 " (instance: %d, export: %d)" %
7167 (instance_disks, export_disks),
7171 for idx in range(export_disks):
7172 option = 'disk%d_dump' % idx
7173 if export_info.has_option(constants.INISECT_INS, option):
7174 # FIXME: are the old os-es, disk sizes, etc. useful?
7175 export_name = export_info.get(constants.INISECT_INS, option)
7176 image = utils.PathJoin(self.op.src_path, export_name)
7177 disk_images.append(image)
7179 disk_images.append(False)
7181 self.src_images = disk_images
7183 old_name = export_info.get(constants.INISECT_INS, 'name')
7185 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7186 except (TypeError, ValueError), err:
7187 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7188 " an integer: %s" % str(err),
7190 if self.op.instance_name == old_name:
7191 for idx, nic in enumerate(self.nics):
7192 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7193 nic_mac_ini = 'nic%d_mac' % idx
7194 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7196 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7198 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7199 if self.op.ip_check:
7200 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7201 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7202 (self.check_ip, self.op.instance_name),
7203 errors.ECODE_NOTUNIQUE)
7205 #### mac address generation
7206 # By generating here the mac address both the allocator and the hooks get
7207 # the real final mac address rather than the 'auto' or 'generate' value.
7208 # There is a race condition between the generation and the instance object
7209 # creation, which means that we know the mac is valid now, but we're not
7210 # sure it will be when we actually add the instance. If things go bad
7211 # adding the instance will abort because of a duplicate mac, and the
7212 # creation job will fail.
7213 for nic in self.nics:
7214 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7215 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7219 if self.op.iallocator is not None:
7220 self._RunAllocator()
7222 #### node related checks
7224 # check primary node
7225 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7226 assert self.pnode is not None, \
7227 "Cannot retrieve locked node %s" % self.op.pnode
7229 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7230 pnode.name, errors.ECODE_STATE)
7232 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7233 pnode.name, errors.ECODE_STATE)
7235 self.secondaries = []
7237 # mirror node verification
7238 if self.op.disk_template in constants.DTS_NET_MIRROR:
7239 if self.op.snode == pnode.name:
7240 raise errors.OpPrereqError("The secondary node cannot be the"
7241 " primary node.", errors.ECODE_INVAL)
7242 _CheckNodeOnline(self, self.op.snode)
7243 _CheckNodeNotDrained(self, self.op.snode)
7244 self.secondaries.append(self.op.snode)
7246 nodenames = [pnode.name] + self.secondaries
7248 req_size = _ComputeDiskSize(self.op.disk_template,
7251 # Check lv size requirements, if not adopting
7252 if req_size is not None and not self.adopt_disks:
7253 _CheckNodesFreeDisk(self, nodenames, req_size)
7255 if self.adopt_disks: # instead, we must check the adoption data
7256 all_lvs = set([i["adopt"] for i in self.disks])
7257 if len(all_lvs) != len(self.disks):
7258 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7260 for lv_name in all_lvs:
7262 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7263 except errors.ReservationError:
7264 raise errors.OpPrereqError("LV named %s used by another instance" %
7265 lv_name, errors.ECODE_NOTUNIQUE)
7267 node_lvs = self.rpc.call_lv_list([pnode.name],
7268 self.cfg.GetVGName())[pnode.name]
7269 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7270 node_lvs = node_lvs.payload
7271 delta = all_lvs.difference(node_lvs.keys())
7273 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7274 utils.CommaJoin(delta),
7276 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7278 raise errors.OpPrereqError("Online logical volumes found, cannot"
7279 " adopt: %s" % utils.CommaJoin(online_lvs),
7281 # update the size of disk based on what is found
7282 for dsk in self.disks:
7283 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7285 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7287 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7288 # check OS parameters (remotely)
7289 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7291 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7293 # memory check on primary node
7295 _CheckNodeFreeMemory(self, self.pnode.name,
7296 "creating instance %s" % self.op.instance_name,
7297 self.be_full[constants.BE_MEMORY],
7300 self.dry_run_result = list(nodenames)
7302 def Exec(self, feedback_fn):
7303 """Create and add the instance to the cluster.
7306 instance = self.op.instance_name
7307 pnode_name = self.pnode.name
7309 ht_kind = self.op.hypervisor
7310 if ht_kind in constants.HTS_REQ_PORT:
7311 network_port = self.cfg.AllocatePort()
7315 if constants.ENABLE_FILE_STORAGE:
7316 # this is needed because os.path.join does not accept None arguments
7317 if self.op.file_storage_dir is None:
7318 string_file_storage_dir = ""
7320 string_file_storage_dir = self.op.file_storage_dir
7322 # build the full file storage dir path
7323 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7324 string_file_storage_dir, instance)
7326 file_storage_dir = ""
7328 disks = _GenerateDiskTemplate(self,
7329 self.op.disk_template,
7330 instance, pnode_name,
7334 self.op.file_driver,
7337 iobj = objects.Instance(name=instance, os=self.op.os_type,
7338 primary_node=pnode_name,
7339 nics=self.nics, disks=disks,
7340 disk_template=self.op.disk_template,
7342 network_port=network_port,
7343 beparams=self.op.beparams,
7344 hvparams=self.op.hvparams,
7345 hypervisor=self.op.hypervisor,
7346 osparams=self.op.osparams,
7349 if self.adopt_disks:
7350 # rename LVs to the newly-generated names; we need to construct
7351 # 'fake' LV disks with the old data, plus the new unique_id
7352 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7354 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7355 rename_to.append(t_dsk.logical_id)
7356 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7357 self.cfg.SetDiskID(t_dsk, pnode_name)
7358 result = self.rpc.call_blockdev_rename(pnode_name,
7359 zip(tmp_disks, rename_to))
7360 result.Raise("Failed to rename adoped LVs")
7362 feedback_fn("* creating instance disks...")
7364 _CreateDisks(self, iobj)
7365 except errors.OpExecError:
7366 self.LogWarning("Device creation failed, reverting...")
7368 _RemoveDisks(self, iobj)
7370 self.cfg.ReleaseDRBDMinors(instance)
7373 feedback_fn("adding instance %s to cluster config" % instance)
7375 self.cfg.AddInstance(iobj, self.proc.GetECId())
7377 # Declare that we don't want to remove the instance lock anymore, as we've
7378 # added the instance to the config
7379 del self.remove_locks[locking.LEVEL_INSTANCE]
7380 # Unlock all the nodes
7381 if self.op.mode == constants.INSTANCE_IMPORT:
7382 nodes_keep = [self.op.src_node]
7383 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7384 if node != self.op.src_node]
7385 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7386 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7388 self.context.glm.release(locking.LEVEL_NODE)
7389 del self.acquired_locks[locking.LEVEL_NODE]
7391 if self.op.wait_for_sync:
7392 disk_abort = not _WaitForSync(self, iobj)
7393 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7394 # make sure the disks are not degraded (still sync-ing is ok)
7396 feedback_fn("* checking mirrors status")
7397 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7402 _RemoveDisks(self, iobj)
7403 self.cfg.RemoveInstance(iobj.name)
7404 # Make sure the instance lock gets removed
7405 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7406 raise errors.OpExecError("There are some degraded disks for"
7409 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7410 if self.op.mode == constants.INSTANCE_CREATE:
7411 if not self.op.no_install:
7412 feedback_fn("* running the instance OS create scripts...")
7413 # FIXME: pass debug option from opcode to backend
7414 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7415 self.op.debug_level)
7416 result.Raise("Could not add os for instance %s"
7417 " on node %s" % (instance, pnode_name))
7419 elif self.op.mode == constants.INSTANCE_IMPORT:
7420 feedback_fn("* running the instance OS import scripts...")
7424 for idx, image in enumerate(self.src_images):
7428 # FIXME: pass debug option from opcode to backend
7429 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7430 constants.IEIO_FILE, (image, ),
7431 constants.IEIO_SCRIPT,
7432 (iobj.disks[idx], idx),
7434 transfers.append(dt)
7437 masterd.instance.TransferInstanceData(self, feedback_fn,
7438 self.op.src_node, pnode_name,
7439 self.pnode.secondary_ip,
7441 if not compat.all(import_result):
7442 self.LogWarning("Some disks for instance %s on node %s were not"
7443 " imported successfully" % (instance, pnode_name))
7445 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7446 feedback_fn("* preparing remote import...")
7447 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7448 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7450 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7451 self.source_x509_ca,
7452 self._cds, timeouts)
7453 if not compat.all(disk_results):
7454 # TODO: Should the instance still be started, even if some disks
7455 # failed to import (valid for local imports, too)?
7456 self.LogWarning("Some disks for instance %s on node %s were not"
7457 " imported successfully" % (instance, pnode_name))
7459 # Run rename script on newly imported instance
7460 assert iobj.name == instance
7461 feedback_fn("Running rename script for %s" % instance)
7462 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7463 self.source_instance_name,
7464 self.op.debug_level)
7466 self.LogWarning("Failed to run rename script for %s on node"
7467 " %s: %s" % (instance, pnode_name, result.fail_msg))
7470 # also checked in the prereq part
7471 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7475 iobj.admin_up = True
7476 self.cfg.Update(iobj, feedback_fn)
7477 logging.info("Starting instance %s on node %s", instance, pnode_name)
7478 feedback_fn("* starting instance...")
7479 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7480 result.Raise("Could not start instance")
7482 return list(iobj.all_nodes)
7485 class LUConnectConsole(NoHooksLU):
7486 """Connect to an instance's console.
7488 This is somewhat special in that it returns the command line that
7489 you need to run on the master node in order to connect to the
7498 def ExpandNames(self):
7499 self._ExpandAndLockInstance()
7501 def CheckPrereq(self):
7502 """Check prerequisites.
7504 This checks that the instance is in the cluster.
7507 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7508 assert self.instance is not None, \
7509 "Cannot retrieve locked instance %s" % self.op.instance_name
7510 _CheckNodeOnline(self, self.instance.primary_node)
7512 def Exec(self, feedback_fn):
7513 """Connect to the console of an instance
7516 instance = self.instance
7517 node = instance.primary_node
7519 node_insts = self.rpc.call_instance_list([node],
7520 [instance.hypervisor])[node]
7521 node_insts.Raise("Can't get node information from %s" % node)
7523 if instance.name not in node_insts.payload:
7524 raise errors.OpExecError("Instance %s is not running." % instance.name)
7526 logging.debug("Connecting to console of %s on %s", instance.name, node)
7528 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7529 cluster = self.cfg.GetClusterInfo()
7530 # beparams and hvparams are passed separately, to avoid editing the
7531 # instance and then saving the defaults in the instance itself.
7532 hvparams = cluster.FillHV(instance)
7533 beparams = cluster.FillBE(instance)
7534 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7537 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7540 class LUReplaceDisks(LogicalUnit):
7541 """Replace the disks of an instance.
7544 HPATH = "mirrors-replace"
7545 HTYPE = constants.HTYPE_INSTANCE
7548 ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7549 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7550 ("remote_node", None, _TMaybeString),
7551 ("iallocator", None, _TMaybeString),
7552 ("early_release", False, _TBool),
7556 def CheckArguments(self):
7557 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7560 def ExpandNames(self):
7561 self._ExpandAndLockInstance()
7563 if self.op.iallocator is not None:
7564 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7566 elif self.op.remote_node is not None:
7567 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7568 self.op.remote_node = remote_node
7570 # Warning: do not remove the locking of the new secondary here
7571 # unless DRBD8.AddChildren is changed to work in parallel;
7572 # currently it doesn't since parallel invocations of
7573 # FindUnusedMinor will conflict
7574 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7575 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7578 self.needed_locks[locking.LEVEL_NODE] = []
7579 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7581 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7582 self.op.iallocator, self.op.remote_node,
7583 self.op.disks, False, self.op.early_release)
7585 self.tasklets = [self.replacer]
7587 def DeclareLocks(self, level):
7588 # If we're not already locking all nodes in the set we have to declare the
7589 # instance's primary/secondary nodes.
7590 if (level == locking.LEVEL_NODE and
7591 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7592 self._LockInstancesNodes()
7594 def BuildHooksEnv(self):
7597 This runs on the master, the primary and all the secondaries.
7600 instance = self.replacer.instance
7602 "MODE": self.op.mode,
7603 "NEW_SECONDARY": self.op.remote_node,
7604 "OLD_SECONDARY": instance.secondary_nodes[0],
7606 env.update(_BuildInstanceHookEnvByObject(self, instance))
7608 self.cfg.GetMasterNode(),
7609 instance.primary_node,
7611 if self.op.remote_node is not None:
7612 nl.append(self.op.remote_node)
7616 class TLReplaceDisks(Tasklet):
7617 """Replaces disks for an instance.
7619 Note: Locking is not within the scope of this class.
7622 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7623 disks, delay_iallocator, early_release):
7624 """Initializes this class.
7627 Tasklet.__init__(self, lu)
7630 self.instance_name = instance_name
7632 self.iallocator_name = iallocator_name
7633 self.remote_node = remote_node
7635 self.delay_iallocator = delay_iallocator
7636 self.early_release = early_release
7639 self.instance = None
7640 self.new_node = None
7641 self.target_node = None
7642 self.other_node = None
7643 self.remote_node_info = None
7644 self.node_secondary_ip = None
7647 def CheckArguments(mode, remote_node, iallocator):
7648 """Helper function for users of this class.
7651 # check for valid parameter combination
7652 if mode == constants.REPLACE_DISK_CHG:
7653 if remote_node is None and iallocator is None:
7654 raise errors.OpPrereqError("When changing the secondary either an"
7655 " iallocator script must be used or the"
7656 " new node given", errors.ECODE_INVAL)
7658 if remote_node is not None and iallocator is not None:
7659 raise errors.OpPrereqError("Give either the iallocator or the new"
7660 " secondary, not both", errors.ECODE_INVAL)
7662 elif remote_node is not None or iallocator is not None:
7663 # Not replacing the secondary
7664 raise errors.OpPrereqError("The iallocator and new node options can"
7665 " only be used when changing the"
7666 " secondary node", errors.ECODE_INVAL)
7669 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7670 """Compute a new secondary node using an IAllocator.
7673 ial = IAllocator(lu.cfg, lu.rpc,
7674 mode=constants.IALLOCATOR_MODE_RELOC,
7676 relocate_from=relocate_from)
7678 ial.Run(iallocator_name)
7681 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7682 " %s" % (iallocator_name, ial.info),
7685 if len(ial.result) != ial.required_nodes:
7686 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7687 " of nodes (%s), required %s" %
7689 len(ial.result), ial.required_nodes),
7692 remote_node_name = ial.result[0]
7694 lu.LogInfo("Selected new secondary for instance '%s': %s",
7695 instance_name, remote_node_name)
7697 return remote_node_name
7699 def _FindFaultyDisks(self, node_name):
7700 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7703 def CheckPrereq(self):
7704 """Check prerequisites.
7706 This checks that the instance is in the cluster.
7709 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7710 assert instance is not None, \
7711 "Cannot retrieve locked instance %s" % self.instance_name
7713 if instance.disk_template != constants.DT_DRBD8:
7714 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7715 " instances", errors.ECODE_INVAL)
7717 if len(instance.secondary_nodes) != 1:
7718 raise errors.OpPrereqError("The instance has a strange layout,"
7719 " expected one secondary but found %d" %
7720 len(instance.secondary_nodes),
7723 if not self.delay_iallocator:
7724 self._CheckPrereq2()
7726 def _CheckPrereq2(self):
7727 """Check prerequisites, second part.
7729 This function should always be part of CheckPrereq. It was separated and is
7730 now called from Exec because during node evacuation iallocator was only
7731 called with an unmodified cluster model, not taking planned changes into
7735 instance = self.instance
7736 secondary_node = instance.secondary_nodes[0]
7738 if self.iallocator_name is None:
7739 remote_node = self.remote_node
7741 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7742 instance.name, instance.secondary_nodes)
7744 if remote_node is not None:
7745 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7746 assert self.remote_node_info is not None, \
7747 "Cannot retrieve locked node %s" % remote_node
7749 self.remote_node_info = None
7751 if remote_node == self.instance.primary_node:
7752 raise errors.OpPrereqError("The specified node is the primary node of"
7753 " the instance.", errors.ECODE_INVAL)
7755 if remote_node == secondary_node:
7756 raise errors.OpPrereqError("The specified node is already the"
7757 " secondary node of the instance.",
7760 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7761 constants.REPLACE_DISK_CHG):
7762 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7765 if self.mode == constants.REPLACE_DISK_AUTO:
7766 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7767 faulty_secondary = self._FindFaultyDisks(secondary_node)
7769 if faulty_primary and faulty_secondary:
7770 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7771 " one node and can not be repaired"
7772 " automatically" % self.instance_name,
7776 self.disks = faulty_primary
7777 self.target_node = instance.primary_node
7778 self.other_node = secondary_node
7779 check_nodes = [self.target_node, self.other_node]
7780 elif faulty_secondary:
7781 self.disks = faulty_secondary
7782 self.target_node = secondary_node
7783 self.other_node = instance.primary_node
7784 check_nodes = [self.target_node, self.other_node]
7790 # Non-automatic modes
7791 if self.mode == constants.REPLACE_DISK_PRI:
7792 self.target_node = instance.primary_node
7793 self.other_node = secondary_node
7794 check_nodes = [self.target_node, self.other_node]
7796 elif self.mode == constants.REPLACE_DISK_SEC:
7797 self.target_node = secondary_node
7798 self.other_node = instance.primary_node
7799 check_nodes = [self.target_node, self.other_node]
7801 elif self.mode == constants.REPLACE_DISK_CHG:
7802 self.new_node = remote_node
7803 self.other_node = instance.primary_node
7804 self.target_node = secondary_node
7805 check_nodes = [self.new_node, self.other_node]
7807 _CheckNodeNotDrained(self.lu, remote_node)
7809 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7810 assert old_node_info is not None
7811 if old_node_info.offline and not self.early_release:
7812 # doesn't make sense to delay the release
7813 self.early_release = True
7814 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7815 " early-release mode", secondary_node)
7818 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7821 # If not specified all disks should be replaced
7823 self.disks = range(len(self.instance.disks))
7825 for node in check_nodes:
7826 _CheckNodeOnline(self.lu, node)
7828 # Check whether disks are valid
7829 for disk_idx in self.disks:
7830 instance.FindDisk(disk_idx)
7832 # Get secondary node IP addresses
7835 for node_name in [self.target_node, self.other_node, self.new_node]:
7836 if node_name is not None:
7837 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7839 self.node_secondary_ip = node_2nd_ip
7841 def Exec(self, feedback_fn):
7842 """Execute disk replacement.
7844 This dispatches the disk replacement to the appropriate handler.
7847 if self.delay_iallocator:
7848 self._CheckPrereq2()
7851 feedback_fn("No disks need replacement")
7854 feedback_fn("Replacing disk(s) %s for %s" %
7855 (utils.CommaJoin(self.disks), self.instance.name))
7857 activate_disks = (not self.instance.admin_up)
7859 # Activate the instance disks if we're replacing them on a down instance
7861 _StartInstanceDisks(self.lu, self.instance, True)
7864 # Should we replace the secondary node?
7865 if self.new_node is not None:
7866 fn = self._ExecDrbd8Secondary
7868 fn = self._ExecDrbd8DiskOnly
7870 return fn(feedback_fn)
7873 # Deactivate the instance disks if we're replacing them on a
7876 _SafeShutdownInstanceDisks(self.lu, self.instance)
7878 def _CheckVolumeGroup(self, nodes):
7879 self.lu.LogInfo("Checking volume groups")
7881 vgname = self.cfg.GetVGName()
7883 # Make sure volume group exists on all involved nodes
7884 results = self.rpc.call_vg_list(nodes)
7886 raise errors.OpExecError("Can't list volume groups on the nodes")
7890 res.Raise("Error checking node %s" % node)
7891 if vgname not in res.payload:
7892 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7895 def _CheckDisksExistence(self, nodes):
7896 # Check disk existence
7897 for idx, dev in enumerate(self.instance.disks):
7898 if idx not in self.disks:
7902 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7903 self.cfg.SetDiskID(dev, node)
7905 result = self.rpc.call_blockdev_find(node, dev)
7907 msg = result.fail_msg
7908 if msg or not result.payload:
7910 msg = "disk not found"
7911 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7914 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7915 for idx, dev in enumerate(self.instance.disks):
7916 if idx not in self.disks:
7919 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7922 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7924 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7925 " replace disks for instance %s" %
7926 (node_name, self.instance.name))
7928 def _CreateNewStorage(self, node_name):
7929 vgname = self.cfg.GetVGName()
7932 for idx, dev in enumerate(self.instance.disks):
7933 if idx not in self.disks:
7936 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7938 self.cfg.SetDiskID(dev, node_name)
7940 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7941 names = _GenerateUniqueNames(self.lu, lv_names)
7943 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7944 logical_id=(vgname, names[0]))
7945 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7946 logical_id=(vgname, names[1]))
7948 new_lvs = [lv_data, lv_meta]
7949 old_lvs = dev.children
7950 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7952 # we pass force_create=True to force the LVM creation
7953 for new_lv in new_lvs:
7954 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7955 _GetInstanceInfoText(self.instance), False)
7959 def _CheckDevices(self, node_name, iv_names):
7960 for name, (dev, _, _) in iv_names.iteritems():
7961 self.cfg.SetDiskID(dev, node_name)
7963 result = self.rpc.call_blockdev_find(node_name, dev)
7965 msg = result.fail_msg
7966 if msg or not result.payload:
7968 msg = "disk not found"
7969 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7972 if result.payload.is_degraded:
7973 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7975 def _RemoveOldStorage(self, node_name, iv_names):
7976 for name, (_, old_lvs, _) in iv_names.iteritems():
7977 self.lu.LogInfo("Remove logical volumes for %s" % name)
7980 self.cfg.SetDiskID(lv, node_name)
7982 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7984 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7985 hint="remove unused LVs manually")
7987 def _ReleaseNodeLock(self, node_name):
7988 """Releases the lock for a given node."""
7989 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7991 def _ExecDrbd8DiskOnly(self, feedback_fn):
7992 """Replace a disk on the primary or secondary for DRBD 8.
7994 The algorithm for replace is quite complicated:
7996 1. for each disk to be replaced:
7998 1. create new LVs on the target node with unique names
7999 1. detach old LVs from the drbd device
8000 1. rename old LVs to name_replaced.<time_t>
8001 1. rename new LVs to old LVs
8002 1. attach the new LVs (with the old names now) to the drbd device
8004 1. wait for sync across all devices
8006 1. for each modified disk:
8008 1. remove old LVs (which have the name name_replaces.<time_t>)
8010 Failures are not very well handled.
8015 # Step: check device activation
8016 self.lu.LogStep(1, steps_total, "Check device existence")
8017 self._CheckDisksExistence([self.other_node, self.target_node])
8018 self._CheckVolumeGroup([self.target_node, self.other_node])
8020 # Step: check other node consistency
8021 self.lu.LogStep(2, steps_total, "Check peer consistency")
8022 self._CheckDisksConsistency(self.other_node,
8023 self.other_node == self.instance.primary_node,
8026 # Step: create new storage
8027 self.lu.LogStep(3, steps_total, "Allocate new storage")
8028 iv_names = self._CreateNewStorage(self.target_node)
8030 # Step: for each lv, detach+rename*2+attach
8031 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8032 for dev, old_lvs, new_lvs in iv_names.itervalues():
8033 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8035 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8037 result.Raise("Can't detach drbd from local storage on node"
8038 " %s for device %s" % (self.target_node, dev.iv_name))
8040 #cfg.Update(instance)
8042 # ok, we created the new LVs, so now we know we have the needed
8043 # storage; as such, we proceed on the target node to rename
8044 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8045 # using the assumption that logical_id == physical_id (which in
8046 # turn is the unique_id on that node)
8048 # FIXME(iustin): use a better name for the replaced LVs
8049 temp_suffix = int(time.time())
8050 ren_fn = lambda d, suff: (d.physical_id[0],
8051 d.physical_id[1] + "_replaced-%s" % suff)
8053 # Build the rename list based on what LVs exist on the node
8054 rename_old_to_new = []
8055 for to_ren in old_lvs:
8056 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8057 if not result.fail_msg and result.payload:
8059 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8061 self.lu.LogInfo("Renaming the old LVs on the target node")
8062 result = self.rpc.call_blockdev_rename(self.target_node,
8064 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8066 # Now we rename the new LVs to the old LVs
8067 self.lu.LogInfo("Renaming the new LVs on the target node")
8068 rename_new_to_old = [(new, old.physical_id)
8069 for old, new in zip(old_lvs, new_lvs)]
8070 result = self.rpc.call_blockdev_rename(self.target_node,
8072 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8074 for old, new in zip(old_lvs, new_lvs):
8075 new.logical_id = old.logical_id
8076 self.cfg.SetDiskID(new, self.target_node)
8078 for disk in old_lvs:
8079 disk.logical_id = ren_fn(disk, temp_suffix)
8080 self.cfg.SetDiskID(disk, self.target_node)
8082 # Now that the new lvs have the old name, we can add them to the device
8083 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8084 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8086 msg = result.fail_msg
8088 for new_lv in new_lvs:
8089 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8092 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8093 hint=("cleanup manually the unused logical"
8095 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8097 dev.children = new_lvs
8099 self.cfg.Update(self.instance, feedback_fn)
8102 if self.early_release:
8103 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8105 self._RemoveOldStorage(self.target_node, iv_names)
8106 # WARNING: we release both node locks here, do not do other RPCs
8107 # than WaitForSync to the primary node
8108 self._ReleaseNodeLock([self.target_node, self.other_node])
8111 # This can fail as the old devices are degraded and _WaitForSync
8112 # does a combined result over all disks, so we don't check its return value
8113 self.lu.LogStep(cstep, steps_total, "Sync devices")
8115 _WaitForSync(self.lu, self.instance)
8117 # Check all devices manually
8118 self._CheckDevices(self.instance.primary_node, iv_names)
8120 # Step: remove old storage
8121 if not self.early_release:
8122 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8124 self._RemoveOldStorage(self.target_node, iv_names)
8126 def _ExecDrbd8Secondary(self, feedback_fn):
8127 """Replace the secondary node for DRBD 8.
8129 The algorithm for replace is quite complicated:
8130 - for all disks of the instance:
8131 - create new LVs on the new node with same names
8132 - shutdown the drbd device on the old secondary
8133 - disconnect the drbd network on the primary
8134 - create the drbd device on the new secondary
8135 - network attach the drbd on the primary, using an artifice:
8136 the drbd code for Attach() will connect to the network if it
8137 finds a device which is connected to the good local disks but
8139 - wait for sync across all devices
8140 - remove all disks from the old secondary
8142 Failures are not very well handled.
8147 # Step: check device activation
8148 self.lu.LogStep(1, steps_total, "Check device existence")
8149 self._CheckDisksExistence([self.instance.primary_node])
8150 self._CheckVolumeGroup([self.instance.primary_node])
8152 # Step: check other node consistency
8153 self.lu.LogStep(2, steps_total, "Check peer consistency")
8154 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8156 # Step: create new storage
8157 self.lu.LogStep(3, steps_total, "Allocate new storage")
8158 for idx, dev in enumerate(self.instance.disks):
8159 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8160 (self.new_node, idx))
8161 # we pass force_create=True to force LVM creation
8162 for new_lv in dev.children:
8163 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8164 _GetInstanceInfoText(self.instance), False)
8166 # Step 4: dbrd minors and drbd setups changes
8167 # after this, we must manually remove the drbd minors on both the
8168 # error and the success paths
8169 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8170 minors = self.cfg.AllocateDRBDMinor([self.new_node
8171 for dev in self.instance.disks],
8173 logging.debug("Allocated minors %r", minors)
8176 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8177 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8178 (self.new_node, idx))
8179 # create new devices on new_node; note that we create two IDs:
8180 # one without port, so the drbd will be activated without
8181 # networking information on the new node at this stage, and one
8182 # with network, for the latter activation in step 4
8183 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8184 if self.instance.primary_node == o_node1:
8187 assert self.instance.primary_node == o_node2, "Three-node instance?"
8190 new_alone_id = (self.instance.primary_node, self.new_node, None,
8191 p_minor, new_minor, o_secret)
8192 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8193 p_minor, new_minor, o_secret)
8195 iv_names[idx] = (dev, dev.children, new_net_id)
8196 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8198 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8199 logical_id=new_alone_id,
8200 children=dev.children,
8203 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8204 _GetInstanceInfoText(self.instance), False)
8205 except errors.GenericError:
8206 self.cfg.ReleaseDRBDMinors(self.instance.name)
8209 # We have new devices, shutdown the drbd on the old secondary
8210 for idx, dev in enumerate(self.instance.disks):
8211 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8212 self.cfg.SetDiskID(dev, self.target_node)
8213 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8215 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8216 "node: %s" % (idx, msg),
8217 hint=("Please cleanup this device manually as"
8218 " soon as possible"))
8220 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8221 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8222 self.node_secondary_ip,
8223 self.instance.disks)\
8224 [self.instance.primary_node]
8226 msg = result.fail_msg
8228 # detaches didn't succeed (unlikely)
8229 self.cfg.ReleaseDRBDMinors(self.instance.name)
8230 raise errors.OpExecError("Can't detach the disks from the network on"
8231 " old node: %s" % (msg,))
8233 # if we managed to detach at least one, we update all the disks of
8234 # the instance to point to the new secondary
8235 self.lu.LogInfo("Updating instance configuration")
8236 for dev, _, new_logical_id in iv_names.itervalues():
8237 dev.logical_id = new_logical_id
8238 self.cfg.SetDiskID(dev, self.instance.primary_node)
8240 self.cfg.Update(self.instance, feedback_fn)
8242 # and now perform the drbd attach
8243 self.lu.LogInfo("Attaching primary drbds to new secondary"
8244 " (standalone => connected)")
8245 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8247 self.node_secondary_ip,
8248 self.instance.disks,
8251 for to_node, to_result in result.items():
8252 msg = to_result.fail_msg
8254 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8256 hint=("please do a gnt-instance info to see the"
8257 " status of disks"))
8259 if self.early_release:
8260 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8262 self._RemoveOldStorage(self.target_node, iv_names)
8263 # WARNING: we release all node locks here, do not do other RPCs
8264 # than WaitForSync to the primary node
8265 self._ReleaseNodeLock([self.instance.primary_node,
8270 # This can fail as the old devices are degraded and _WaitForSync
8271 # does a combined result over all disks, so we don't check its return value
8272 self.lu.LogStep(cstep, steps_total, "Sync devices")
8274 _WaitForSync(self.lu, self.instance)
8276 # Check all devices manually
8277 self._CheckDevices(self.instance.primary_node, iv_names)
8279 # Step: remove old storage
8280 if not self.early_release:
8281 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8282 self._RemoveOldStorage(self.target_node, iv_names)
8285 class LURepairNodeStorage(NoHooksLU):
8286 """Repairs the volume group on a node.
8291 ("storage_type", _NoDefault, _CheckStorageType),
8292 ("name", _NoDefault, _TNonEmptyString),
8293 ("ignore_consistency", False, _TBool),
8297 def CheckArguments(self):
8298 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8300 storage_type = self.op.storage_type
8302 if (constants.SO_FIX_CONSISTENCY not in
8303 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8304 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8305 " repaired" % storage_type,
8308 def ExpandNames(self):
8309 self.needed_locks = {
8310 locking.LEVEL_NODE: [self.op.node_name],
8313 def _CheckFaultyDisks(self, instance, node_name):
8314 """Ensure faulty disks abort the opcode or at least warn."""
8316 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8318 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8319 " node '%s'" % (instance.name, node_name),
8321 except errors.OpPrereqError, err:
8322 if self.op.ignore_consistency:
8323 self.proc.LogWarning(str(err.args[0]))
8327 def CheckPrereq(self):
8328 """Check prerequisites.
8331 # Check whether any instance on this node has faulty disks
8332 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8333 if not inst.admin_up:
8335 check_nodes = set(inst.all_nodes)
8336 check_nodes.discard(self.op.node_name)
8337 for inst_node_name in check_nodes:
8338 self._CheckFaultyDisks(inst, inst_node_name)
8340 def Exec(self, feedback_fn):
8341 feedback_fn("Repairing storage unit '%s' on %s ..." %
8342 (self.op.name, self.op.node_name))
8344 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8345 result = self.rpc.call_storage_execute(self.op.node_name,
8346 self.op.storage_type, st_args,
8348 constants.SO_FIX_CONSISTENCY)
8349 result.Raise("Failed to repair storage unit '%s' on %s" %
8350 (self.op.name, self.op.node_name))
8353 class LUNodeEvacuationStrategy(NoHooksLU):
8354 """Computes the node evacuation strategy.
8358 ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8359 ("remote_node", None, _TMaybeString),
8360 ("iallocator", None, _TMaybeString),
8364 def CheckArguments(self):
8365 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8367 def ExpandNames(self):
8368 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8369 self.needed_locks = locks = {}
8370 if self.op.remote_node is None:
8371 locks[locking.LEVEL_NODE] = locking.ALL_SET
8373 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8374 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8376 def Exec(self, feedback_fn):
8377 if self.op.remote_node is not None:
8379 for node in self.op.nodes:
8380 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8383 if i.primary_node == self.op.remote_node:
8384 raise errors.OpPrereqError("Node %s is the primary node of"
8385 " instance %s, cannot use it as"
8387 (self.op.remote_node, i.name),
8389 result.append([i.name, self.op.remote_node])
8391 ial = IAllocator(self.cfg, self.rpc,
8392 mode=constants.IALLOCATOR_MODE_MEVAC,
8393 evac_nodes=self.op.nodes)
8394 ial.Run(self.op.iallocator, validate=True)
8396 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8402 class LUGrowDisk(LogicalUnit):
8403 """Grow a disk of an instance.
8407 HTYPE = constants.HTYPE_INSTANCE
8410 ("disk", _NoDefault, _TInt),
8411 ("amount", _NoDefault, _TInt),
8412 ("wait_for_sync", True, _TBool),
8416 def ExpandNames(self):
8417 self._ExpandAndLockInstance()
8418 self.needed_locks[locking.LEVEL_NODE] = []
8419 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8421 def DeclareLocks(self, level):
8422 if level == locking.LEVEL_NODE:
8423 self._LockInstancesNodes()
8425 def BuildHooksEnv(self):
8428 This runs on the master, the primary and all the secondaries.
8432 "DISK": self.op.disk,
8433 "AMOUNT": self.op.amount,
8435 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8436 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8439 def CheckPrereq(self):
8440 """Check prerequisites.
8442 This checks that the instance is in the cluster.
8445 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8446 assert instance is not None, \
8447 "Cannot retrieve locked instance %s" % self.op.instance_name
8448 nodenames = list(instance.all_nodes)
8449 for node in nodenames:
8450 _CheckNodeOnline(self, node)
8452 self.instance = instance
8454 if instance.disk_template not in constants.DTS_GROWABLE:
8455 raise errors.OpPrereqError("Instance's disk layout does not support"
8456 " growing.", errors.ECODE_INVAL)
8458 self.disk = instance.FindDisk(self.op.disk)
8460 if instance.disk_template != constants.DT_FILE:
8461 # TODO: check the free disk space for file, when that feature will be
8463 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8465 def Exec(self, feedback_fn):
8466 """Execute disk grow.
8469 instance = self.instance
8472 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8474 raise errors.OpExecError("Cannot activate block device to grow")
8476 for node in instance.all_nodes:
8477 self.cfg.SetDiskID(disk, node)
8478 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8479 result.Raise("Grow request failed to node %s" % node)
8481 # TODO: Rewrite code to work properly
8482 # DRBD goes into sync mode for a short amount of time after executing the
8483 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8484 # calling "resize" in sync mode fails. Sleeping for a short amount of
8485 # time is a work-around.
8488 disk.RecordGrow(self.op.amount)
8489 self.cfg.Update(instance, feedback_fn)
8490 if self.op.wait_for_sync:
8491 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8493 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8494 " status.\nPlease check the instance.")
8495 if not instance.admin_up:
8496 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8497 elif not instance.admin_up:
8498 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8499 " not supposed to be running because no wait for"
8500 " sync mode was requested.")
8503 class LUQueryInstanceData(NoHooksLU):
8504 """Query runtime instance data.
8508 ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8509 ("static", False, _TBool),
8513 def ExpandNames(self):
8514 self.needed_locks = {}
8515 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8517 if self.op.instances:
8518 self.wanted_names = []
8519 for name in self.op.instances:
8520 full_name = _ExpandInstanceName(self.cfg, name)
8521 self.wanted_names.append(full_name)
8522 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8524 self.wanted_names = None
8525 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8527 self.needed_locks[locking.LEVEL_NODE] = []
8528 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8530 def DeclareLocks(self, level):
8531 if level == locking.LEVEL_NODE:
8532 self._LockInstancesNodes()
8534 def CheckPrereq(self):
8535 """Check prerequisites.
8537 This only checks the optional instance list against the existing names.
8540 if self.wanted_names is None:
8541 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8543 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8544 in self.wanted_names]
8546 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8547 """Returns the status of a block device
8550 if self.op.static or not node:
8553 self.cfg.SetDiskID(dev, node)
8555 result = self.rpc.call_blockdev_find(node, dev)
8559 result.Raise("Can't compute disk status for %s" % instance_name)
8561 status = result.payload
8565 return (status.dev_path, status.major, status.minor,
8566 status.sync_percent, status.estimated_time,
8567 status.is_degraded, status.ldisk_status)
8569 def _ComputeDiskStatus(self, instance, snode, dev):
8570 """Compute block device status.
8573 if dev.dev_type in constants.LDS_DRBD:
8574 # we change the snode then (otherwise we use the one passed in)
8575 if dev.logical_id[0] == instance.primary_node:
8576 snode = dev.logical_id[1]
8578 snode = dev.logical_id[0]
8580 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8582 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8585 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8586 for child in dev.children]
8591 "iv_name": dev.iv_name,
8592 "dev_type": dev.dev_type,
8593 "logical_id": dev.logical_id,
8594 "physical_id": dev.physical_id,
8595 "pstatus": dev_pstatus,
8596 "sstatus": dev_sstatus,
8597 "children": dev_children,
8604 def Exec(self, feedback_fn):
8605 """Gather and return data"""
8608 cluster = self.cfg.GetClusterInfo()
8610 for instance in self.wanted_instances:
8611 if not self.op.static:
8612 remote_info = self.rpc.call_instance_info(instance.primary_node,
8614 instance.hypervisor)
8615 remote_info.Raise("Error checking node %s" % instance.primary_node)
8616 remote_info = remote_info.payload
8617 if remote_info and "state" in remote_info:
8620 remote_state = "down"
8623 if instance.admin_up:
8626 config_state = "down"
8628 disks = [self._ComputeDiskStatus(instance, None, device)
8629 for device in instance.disks]
8632 "name": instance.name,
8633 "config_state": config_state,
8634 "run_state": remote_state,
8635 "pnode": instance.primary_node,
8636 "snodes": instance.secondary_nodes,
8638 # this happens to be the same format used for hooks
8639 "nics": _NICListToTuple(self, instance.nics),
8640 "disk_template": instance.disk_template,
8642 "hypervisor": instance.hypervisor,
8643 "network_port": instance.network_port,
8644 "hv_instance": instance.hvparams,
8645 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8646 "be_instance": instance.beparams,
8647 "be_actual": cluster.FillBE(instance),
8648 "os_instance": instance.osparams,
8649 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8650 "serial_no": instance.serial_no,
8651 "mtime": instance.mtime,
8652 "ctime": instance.ctime,
8653 "uuid": instance.uuid,
8656 result[instance.name] = idict
8661 class LUSetInstanceParams(LogicalUnit):
8662 """Modifies an instances's parameters.
8665 HPATH = "instance-modify"
8666 HTYPE = constants.HTYPE_INSTANCE
8669 ("nics", _EmptyList, _TList),
8670 ("disks", _EmptyList, _TList),
8671 ("beparams", _EmptyDict, _TDict),
8672 ("hvparams", _EmptyDict, _TDict),
8673 ("disk_template", None, _TMaybeString),
8674 ("remote_node", None, _TMaybeString),
8675 ("os_name", None, _TMaybeString),
8676 ("force_variant", False, _TBool),
8677 ("osparams", None, _TOr(_TDict, _TNone)),
8682 def CheckArguments(self):
8683 if not (self.op.nics or self.op.disks or self.op.disk_template or
8684 self.op.hvparams or self.op.beparams or self.op.os_name):
8685 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8687 if self.op.hvparams:
8688 _CheckGlobalHvParams(self.op.hvparams)
8692 for disk_op, disk_dict in self.op.disks:
8693 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8694 if disk_op == constants.DDM_REMOVE:
8697 elif disk_op == constants.DDM_ADD:
8700 if not isinstance(disk_op, int):
8701 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8702 if not isinstance(disk_dict, dict):
8703 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8704 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8706 if disk_op == constants.DDM_ADD:
8707 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8708 if mode not in constants.DISK_ACCESS_SET:
8709 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8711 size = disk_dict.get('size', None)
8713 raise errors.OpPrereqError("Required disk parameter size missing",
8717 except (TypeError, ValueError), err:
8718 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8719 str(err), errors.ECODE_INVAL)
8720 disk_dict['size'] = size
8722 # modification of disk
8723 if 'size' in disk_dict:
8724 raise errors.OpPrereqError("Disk size change not possible, use"
8725 " grow-disk", errors.ECODE_INVAL)
8727 if disk_addremove > 1:
8728 raise errors.OpPrereqError("Only one disk add or remove operation"
8729 " supported at a time", errors.ECODE_INVAL)
8731 if self.op.disks and self.op.disk_template is not None:
8732 raise errors.OpPrereqError("Disk template conversion and other disk"
8733 " changes not supported at the same time",
8736 if self.op.disk_template:
8737 _CheckDiskTemplate(self.op.disk_template)
8738 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8739 self.op.remote_node is None):
8740 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8741 " one requires specifying a secondary node",
8746 for nic_op, nic_dict in self.op.nics:
8747 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8748 if nic_op == constants.DDM_REMOVE:
8751 elif nic_op == constants.DDM_ADD:
8754 if not isinstance(nic_op, int):
8755 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8756 if not isinstance(nic_dict, dict):
8757 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8758 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8760 # nic_dict should be a dict
8761 nic_ip = nic_dict.get('ip', None)
8762 if nic_ip is not None:
8763 if nic_ip.lower() == constants.VALUE_NONE:
8764 nic_dict['ip'] = None
8766 if not netutils.IsValidIP4(nic_ip):
8767 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8770 nic_bridge = nic_dict.get('bridge', None)
8771 nic_link = nic_dict.get('link', None)
8772 if nic_bridge and nic_link:
8773 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8774 " at the same time", errors.ECODE_INVAL)
8775 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8776 nic_dict['bridge'] = None
8777 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8778 nic_dict['link'] = None
8780 if nic_op == constants.DDM_ADD:
8781 nic_mac = nic_dict.get('mac', None)
8783 nic_dict['mac'] = constants.VALUE_AUTO
8785 if 'mac' in nic_dict:
8786 nic_mac = nic_dict['mac']
8787 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8788 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8790 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8791 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8792 " modifying an existing nic",
8795 if nic_addremove > 1:
8796 raise errors.OpPrereqError("Only one NIC add or remove operation"
8797 " supported at a time", errors.ECODE_INVAL)
8799 def ExpandNames(self):
8800 self._ExpandAndLockInstance()
8801 self.needed_locks[locking.LEVEL_NODE] = []
8802 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8804 def DeclareLocks(self, level):
8805 if level == locking.LEVEL_NODE:
8806 self._LockInstancesNodes()
8807 if self.op.disk_template and self.op.remote_node:
8808 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8809 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8811 def BuildHooksEnv(self):
8814 This runs on the master, primary and secondaries.
8818 if constants.BE_MEMORY in self.be_new:
8819 args['memory'] = self.be_new[constants.BE_MEMORY]
8820 if constants.BE_VCPUS in self.be_new:
8821 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8822 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8823 # information at all.
8826 nic_override = dict(self.op.nics)
8827 for idx, nic in enumerate(self.instance.nics):
8828 if idx in nic_override:
8829 this_nic_override = nic_override[idx]
8831 this_nic_override = {}
8832 if 'ip' in this_nic_override:
8833 ip = this_nic_override['ip']
8836 if 'mac' in this_nic_override:
8837 mac = this_nic_override['mac']
8840 if idx in self.nic_pnew:
8841 nicparams = self.nic_pnew[idx]
8843 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8844 mode = nicparams[constants.NIC_MODE]
8845 link = nicparams[constants.NIC_LINK]
8846 args['nics'].append((ip, mac, mode, link))
8847 if constants.DDM_ADD in nic_override:
8848 ip = nic_override[constants.DDM_ADD].get('ip', None)
8849 mac = nic_override[constants.DDM_ADD]['mac']
8850 nicparams = self.nic_pnew[constants.DDM_ADD]
8851 mode = nicparams[constants.NIC_MODE]
8852 link = nicparams[constants.NIC_LINK]
8853 args['nics'].append((ip, mac, mode, link))
8854 elif constants.DDM_REMOVE in nic_override:
8855 del args['nics'][-1]
8857 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8858 if self.op.disk_template:
8859 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8860 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8863 def CheckPrereq(self):
8864 """Check prerequisites.
8866 This only checks the instance list against the existing names.
8869 # checking the new params on the primary/secondary nodes
8871 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8872 cluster = self.cluster = self.cfg.GetClusterInfo()
8873 assert self.instance is not None, \
8874 "Cannot retrieve locked instance %s" % self.op.instance_name
8875 pnode = instance.primary_node
8876 nodelist = list(instance.all_nodes)
8879 if self.op.os_name and not self.op.force:
8880 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8881 self.op.force_variant)
8882 instance_os = self.op.os_name
8884 instance_os = instance.os
8886 if self.op.disk_template:
8887 if instance.disk_template == self.op.disk_template:
8888 raise errors.OpPrereqError("Instance already has disk template %s" %
8889 instance.disk_template, errors.ECODE_INVAL)
8891 if (instance.disk_template,
8892 self.op.disk_template) not in self._DISK_CONVERSIONS:
8893 raise errors.OpPrereqError("Unsupported disk template conversion from"
8894 " %s to %s" % (instance.disk_template,
8895 self.op.disk_template),
8897 _CheckInstanceDown(self, instance, "cannot change disk template")
8898 if self.op.disk_template in constants.DTS_NET_MIRROR:
8899 if self.op.remote_node == pnode:
8900 raise errors.OpPrereqError("Given new secondary node %s is the same"
8901 " as the primary node of the instance" %
8902 self.op.remote_node, errors.ECODE_STATE)
8903 _CheckNodeOnline(self, self.op.remote_node)
8904 _CheckNodeNotDrained(self, self.op.remote_node)
8905 disks = [{"size": d.size} for d in instance.disks]
8906 required = _ComputeDiskSize(self.op.disk_template, disks)
8907 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8909 # hvparams processing
8910 if self.op.hvparams:
8911 hv_type = instance.hypervisor
8912 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8913 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8914 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8917 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8918 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8919 self.hv_new = hv_new # the new actual values
8920 self.hv_inst = i_hvdict # the new dict (without defaults)
8922 self.hv_new = self.hv_inst = {}
8924 # beparams processing
8925 if self.op.beparams:
8926 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8928 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8929 be_new = cluster.SimpleFillBE(i_bedict)
8930 self.be_new = be_new # the new actual values
8931 self.be_inst = i_bedict # the new dict (without defaults)
8933 self.be_new = self.be_inst = {}
8935 # osparams processing
8936 if self.op.osparams:
8937 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8938 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8939 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8940 self.os_inst = i_osdict # the new dict (without defaults)
8942 self.os_new = self.os_inst = {}
8946 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8947 mem_check_list = [pnode]
8948 if be_new[constants.BE_AUTO_BALANCE]:
8949 # either we changed auto_balance to yes or it was from before
8950 mem_check_list.extend(instance.secondary_nodes)
8951 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8952 instance.hypervisor)
8953 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8954 instance.hypervisor)
8955 pninfo = nodeinfo[pnode]
8956 msg = pninfo.fail_msg
8958 # Assume the primary node is unreachable and go ahead
8959 self.warn.append("Can't get info from primary node %s: %s" %
8961 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8962 self.warn.append("Node data from primary node %s doesn't contain"
8963 " free memory information" % pnode)
8964 elif instance_info.fail_msg:
8965 self.warn.append("Can't get instance runtime information: %s" %
8966 instance_info.fail_msg)
8968 if instance_info.payload:
8969 current_mem = int(instance_info.payload['memory'])
8971 # Assume instance not running
8972 # (there is a slight race condition here, but it's not very probable,
8973 # and we have no other way to check)
8975 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8976 pninfo.payload['memory_free'])
8978 raise errors.OpPrereqError("This change will prevent the instance"
8979 " from starting, due to %d MB of memory"
8980 " missing on its primary node" % miss_mem,
8983 if be_new[constants.BE_AUTO_BALANCE]:
8984 for node, nres in nodeinfo.items():
8985 if node not in instance.secondary_nodes:
8989 self.warn.append("Can't get info from secondary node %s: %s" %
8991 elif not isinstance(nres.payload.get('memory_free', None), int):
8992 self.warn.append("Secondary node %s didn't return free"
8993 " memory information" % node)
8994 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8995 self.warn.append("Not enough memory to failover instance to"
8996 " secondary node %s" % node)
9001 for nic_op, nic_dict in self.op.nics:
9002 if nic_op == constants.DDM_REMOVE:
9003 if not instance.nics:
9004 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9007 if nic_op != constants.DDM_ADD:
9009 if not instance.nics:
9010 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9011 " no NICs" % nic_op,
9013 if nic_op < 0 or nic_op >= len(instance.nics):
9014 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9016 (nic_op, len(instance.nics) - 1),
9018 old_nic_params = instance.nics[nic_op].nicparams
9019 old_nic_ip = instance.nics[nic_op].ip
9024 update_params_dict = dict([(key, nic_dict[key])
9025 for key in constants.NICS_PARAMETERS
9026 if key in nic_dict])
9028 if 'bridge' in nic_dict:
9029 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9031 new_nic_params = _GetUpdatedParams(old_nic_params,
9033 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9034 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9035 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9036 self.nic_pinst[nic_op] = new_nic_params
9037 self.nic_pnew[nic_op] = new_filled_nic_params
9038 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9040 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9041 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9042 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9044 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9046 self.warn.append(msg)
9048 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9049 if new_nic_mode == constants.NIC_MODE_ROUTED:
9050 if 'ip' in nic_dict:
9051 nic_ip = nic_dict['ip']
9055 raise errors.OpPrereqError('Cannot set the nic ip to None'
9056 ' on a routed nic', errors.ECODE_INVAL)
9057 if 'mac' in nic_dict:
9058 nic_mac = nic_dict['mac']
9060 raise errors.OpPrereqError('Cannot set the nic mac to None',
9062 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9063 # otherwise generate the mac
9064 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9066 # or validate/reserve the current one
9068 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9069 except errors.ReservationError:
9070 raise errors.OpPrereqError("MAC address %s already in use"
9071 " in cluster" % nic_mac,
9072 errors.ECODE_NOTUNIQUE)
9075 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9076 raise errors.OpPrereqError("Disk operations not supported for"
9077 " diskless instances",
9079 for disk_op, _ in self.op.disks:
9080 if disk_op == constants.DDM_REMOVE:
9081 if len(instance.disks) == 1:
9082 raise errors.OpPrereqError("Cannot remove the last disk of"
9083 " an instance", errors.ECODE_INVAL)
9084 _CheckInstanceDown(self, instance, "cannot remove disks")
9086 if (disk_op == constants.DDM_ADD and
9087 len(instance.nics) >= constants.MAX_DISKS):
9088 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9089 " add more" % constants.MAX_DISKS,
9091 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9093 if disk_op < 0 or disk_op >= len(instance.disks):
9094 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9096 (disk_op, len(instance.disks)),
9101 def _ConvertPlainToDrbd(self, feedback_fn):
9102 """Converts an instance from plain to drbd.
9105 feedback_fn("Converting template to drbd")
9106 instance = self.instance
9107 pnode = instance.primary_node
9108 snode = self.op.remote_node
9110 # create a fake disk info for _GenerateDiskTemplate
9111 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9112 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9113 instance.name, pnode, [snode],
9114 disk_info, None, None, 0)
9115 info = _GetInstanceInfoText(instance)
9116 feedback_fn("Creating aditional volumes...")
9117 # first, create the missing data and meta devices
9118 for disk in new_disks:
9119 # unfortunately this is... not too nice
9120 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9122 for child in disk.children:
9123 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9124 # at this stage, all new LVs have been created, we can rename the
9126 feedback_fn("Renaming original volumes...")
9127 rename_list = [(o, n.children[0].logical_id)
9128 for (o, n) in zip(instance.disks, new_disks)]
9129 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9130 result.Raise("Failed to rename original LVs")
9132 feedback_fn("Initializing DRBD devices...")
9133 # all child devices are in place, we can now create the DRBD devices
9134 for disk in new_disks:
9135 for node in [pnode, snode]:
9136 f_create = node == pnode
9137 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9139 # at this point, the instance has been modified
9140 instance.disk_template = constants.DT_DRBD8
9141 instance.disks = new_disks
9142 self.cfg.Update(instance, feedback_fn)
9144 # disks are created, waiting for sync
9145 disk_abort = not _WaitForSync(self, instance)
9147 raise errors.OpExecError("There are some degraded disks for"
9148 " this instance, please cleanup manually")
9150 def _ConvertDrbdToPlain(self, feedback_fn):
9151 """Converts an instance from drbd to plain.
9154 instance = self.instance
9155 assert len(instance.secondary_nodes) == 1
9156 pnode = instance.primary_node
9157 snode = instance.secondary_nodes[0]
9158 feedback_fn("Converting template to plain")
9160 old_disks = instance.disks
9161 new_disks = [d.children[0] for d in old_disks]
9163 # copy over size and mode
9164 for parent, child in zip(old_disks, new_disks):
9165 child.size = parent.size
9166 child.mode = parent.mode
9168 # update instance structure
9169 instance.disks = new_disks
9170 instance.disk_template = constants.DT_PLAIN
9171 self.cfg.Update(instance, feedback_fn)
9173 feedback_fn("Removing volumes on the secondary node...")
9174 for disk in old_disks:
9175 self.cfg.SetDiskID(disk, snode)
9176 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9178 self.LogWarning("Could not remove block device %s on node %s,"
9179 " continuing anyway: %s", disk.iv_name, snode, msg)
9181 feedback_fn("Removing unneeded volumes on the primary node...")
9182 for idx, disk in enumerate(old_disks):
9183 meta = disk.children[1]
9184 self.cfg.SetDiskID(meta, pnode)
9185 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9187 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9188 " continuing anyway: %s", idx, pnode, msg)
9191 def Exec(self, feedback_fn):
9192 """Modifies an instance.
9194 All parameters take effect only at the next restart of the instance.
9197 # Process here the warnings from CheckPrereq, as we don't have a
9198 # feedback_fn there.
9199 for warn in self.warn:
9200 feedback_fn("WARNING: %s" % warn)
9203 instance = self.instance
9205 for disk_op, disk_dict in self.op.disks:
9206 if disk_op == constants.DDM_REMOVE:
9207 # remove the last disk
9208 device = instance.disks.pop()
9209 device_idx = len(instance.disks)
9210 for node, disk in device.ComputeNodeTree(instance.primary_node):
9211 self.cfg.SetDiskID(disk, node)
9212 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9214 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9215 " continuing anyway", device_idx, node, msg)
9216 result.append(("disk/%d" % device_idx, "remove"))
9217 elif disk_op == constants.DDM_ADD:
9219 if instance.disk_template == constants.DT_FILE:
9220 file_driver, file_path = instance.disks[0].logical_id
9221 file_path = os.path.dirname(file_path)
9223 file_driver = file_path = None
9224 disk_idx_base = len(instance.disks)
9225 new_disk = _GenerateDiskTemplate(self,
9226 instance.disk_template,
9227 instance.name, instance.primary_node,
9228 instance.secondary_nodes,
9233 instance.disks.append(new_disk)
9234 info = _GetInstanceInfoText(instance)
9236 logging.info("Creating volume %s for instance %s",
9237 new_disk.iv_name, instance.name)
9238 # Note: this needs to be kept in sync with _CreateDisks
9240 for node in instance.all_nodes:
9241 f_create = node == instance.primary_node
9243 _CreateBlockDev(self, node, instance, new_disk,
9244 f_create, info, f_create)
9245 except errors.OpExecError, err:
9246 self.LogWarning("Failed to create volume %s (%s) on"
9248 new_disk.iv_name, new_disk, node, err)
9249 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9250 (new_disk.size, new_disk.mode)))
9252 # change a given disk
9253 instance.disks[disk_op].mode = disk_dict['mode']
9254 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9256 if self.op.disk_template:
9257 r_shut = _ShutdownInstanceDisks(self, instance)
9259 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9260 " proceed with disk template conversion")
9261 mode = (instance.disk_template, self.op.disk_template)
9263 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9265 self.cfg.ReleaseDRBDMinors(instance.name)
9267 result.append(("disk_template", self.op.disk_template))
9270 for nic_op, nic_dict in self.op.nics:
9271 if nic_op == constants.DDM_REMOVE:
9272 # remove the last nic
9273 del instance.nics[-1]
9274 result.append(("nic.%d" % len(instance.nics), "remove"))
9275 elif nic_op == constants.DDM_ADD:
9276 # mac and bridge should be set, by now
9277 mac = nic_dict['mac']
9278 ip = nic_dict.get('ip', None)
9279 nicparams = self.nic_pinst[constants.DDM_ADD]
9280 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9281 instance.nics.append(new_nic)
9282 result.append(("nic.%d" % (len(instance.nics) - 1),
9283 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9284 (new_nic.mac, new_nic.ip,
9285 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9286 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9289 for key in 'mac', 'ip':
9291 setattr(instance.nics[nic_op], key, nic_dict[key])
9292 if nic_op in self.nic_pinst:
9293 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9294 for key, val in nic_dict.iteritems():
9295 result.append(("nic.%s/%d" % (key, nic_op), val))
9298 if self.op.hvparams:
9299 instance.hvparams = self.hv_inst
9300 for key, val in self.op.hvparams.iteritems():
9301 result.append(("hv/%s" % key, val))
9304 if self.op.beparams:
9305 instance.beparams = self.be_inst
9306 for key, val in self.op.beparams.iteritems():
9307 result.append(("be/%s" % key, val))
9311 instance.os = self.op.os_name
9314 if self.op.osparams:
9315 instance.osparams = self.os_inst
9316 for key, val in self.op.osparams.iteritems():
9317 result.append(("os/%s" % key, val))
9319 self.cfg.Update(instance, feedback_fn)
9323 _DISK_CONVERSIONS = {
9324 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9325 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9329 class LUQueryExports(NoHooksLU):
9330 """Query the exports list
9334 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9335 ("use_locking", False, _TBool),
9339 def ExpandNames(self):
9340 self.needed_locks = {}
9341 self.share_locks[locking.LEVEL_NODE] = 1
9342 if not self.op.nodes:
9343 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9345 self.needed_locks[locking.LEVEL_NODE] = \
9346 _GetWantedNodes(self, self.op.nodes)
9348 def Exec(self, feedback_fn):
9349 """Compute the list of all the exported system images.
9352 @return: a dictionary with the structure node->(export-list)
9353 where export-list is a list of the instances exported on
9357 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9358 rpcresult = self.rpc.call_export_list(self.nodes)
9360 for node in rpcresult:
9361 if rpcresult[node].fail_msg:
9362 result[node] = False
9364 result[node] = rpcresult[node].payload
9369 class LUPrepareExport(NoHooksLU):
9370 """Prepares an instance for an export and returns useful information.
9375 ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9379 def ExpandNames(self):
9380 self._ExpandAndLockInstance()
9382 def CheckPrereq(self):
9383 """Check prerequisites.
9386 instance_name = self.op.instance_name
9388 self.instance = self.cfg.GetInstanceInfo(instance_name)
9389 assert self.instance is not None, \
9390 "Cannot retrieve locked instance %s" % self.op.instance_name
9391 _CheckNodeOnline(self, self.instance.primary_node)
9393 self._cds = _GetClusterDomainSecret()
9395 def Exec(self, feedback_fn):
9396 """Prepares an instance for an export.
9399 instance = self.instance
9401 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9402 salt = utils.GenerateSecret(8)
9404 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9405 result = self.rpc.call_x509_cert_create(instance.primary_node,
9406 constants.RIE_CERT_VALIDITY)
9407 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9409 (name, cert_pem) = result.payload
9411 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9415 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9416 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9418 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9424 class LUExportInstance(LogicalUnit):
9425 """Export an instance to an image in the cluster.
9428 HPATH = "instance-export"
9429 HTYPE = constants.HTYPE_INSTANCE
9432 ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9433 ("shutdown", True, _TBool),
9435 ("remove_instance", False, _TBool),
9436 ("ignore_remove_failures", False, _TBool),
9437 ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9438 ("x509_key_name", None, _TOr(_TList, _TNone)),
9439 ("destination_x509_ca", None, _TMaybeString),
9443 def CheckArguments(self):
9444 """Check the arguments.
9447 self.x509_key_name = self.op.x509_key_name
9448 self.dest_x509_ca_pem = self.op.destination_x509_ca
9450 if self.op.remove_instance and not self.op.shutdown:
9451 raise errors.OpPrereqError("Can not remove instance without shutting it"
9454 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9455 if not self.x509_key_name:
9456 raise errors.OpPrereqError("Missing X509 key name for encryption",
9459 if not self.dest_x509_ca_pem:
9460 raise errors.OpPrereqError("Missing destination X509 CA",
9463 def ExpandNames(self):
9464 self._ExpandAndLockInstance()
9466 # Lock all nodes for local exports
9467 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9468 # FIXME: lock only instance primary and destination node
9470 # Sad but true, for now we have do lock all nodes, as we don't know where
9471 # the previous export might be, and in this LU we search for it and
9472 # remove it from its current node. In the future we could fix this by:
9473 # - making a tasklet to search (share-lock all), then create the
9474 # new one, then one to remove, after
9475 # - removing the removal operation altogether
9476 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9478 def DeclareLocks(self, level):
9479 """Last minute lock declaration."""
9480 # All nodes are locked anyway, so nothing to do here.
9482 def BuildHooksEnv(self):
9485 This will run on the master, primary node and target node.
9489 "EXPORT_MODE": self.op.mode,
9490 "EXPORT_NODE": self.op.target_node,
9491 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9492 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9493 # TODO: Generic function for boolean env variables
9494 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9497 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9499 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9501 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9502 nl.append(self.op.target_node)
9506 def CheckPrereq(self):
9507 """Check prerequisites.
9509 This checks that the instance and node names are valid.
9512 instance_name = self.op.instance_name
9514 self.instance = self.cfg.GetInstanceInfo(instance_name)
9515 assert self.instance is not None, \
9516 "Cannot retrieve locked instance %s" % self.op.instance_name
9517 _CheckNodeOnline(self, self.instance.primary_node)
9519 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9520 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9521 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9522 assert self.dst_node is not None
9524 _CheckNodeOnline(self, self.dst_node.name)
9525 _CheckNodeNotDrained(self, self.dst_node.name)
9528 self.dest_disk_info = None
9529 self.dest_x509_ca = None
9531 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9532 self.dst_node = None
9534 if len(self.op.target_node) != len(self.instance.disks):
9535 raise errors.OpPrereqError(("Received destination information for %s"
9536 " disks, but instance %s has %s disks") %
9537 (len(self.op.target_node), instance_name,
9538 len(self.instance.disks)),
9541 cds = _GetClusterDomainSecret()
9543 # Check X509 key name
9545 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9546 except (TypeError, ValueError), err:
9547 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9549 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9550 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9553 # Load and verify CA
9555 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9556 except OpenSSL.crypto.Error, err:
9557 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9558 (err, ), errors.ECODE_INVAL)
9560 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9561 if errcode is not None:
9562 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9563 (msg, ), errors.ECODE_INVAL)
9565 self.dest_x509_ca = cert
9567 # Verify target information
9569 for idx, disk_data in enumerate(self.op.target_node):
9571 (host, port, magic) = \
9572 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9573 except errors.GenericError, err:
9574 raise errors.OpPrereqError("Target info for disk %s: %s" %
9575 (idx, err), errors.ECODE_INVAL)
9577 disk_info.append((host, port, magic))
9579 assert len(disk_info) == len(self.op.target_node)
9580 self.dest_disk_info = disk_info
9583 raise errors.ProgrammerError("Unhandled export mode %r" %
9586 # instance disk type verification
9587 # TODO: Implement export support for file-based disks
9588 for disk in self.instance.disks:
9589 if disk.dev_type == constants.LD_FILE:
9590 raise errors.OpPrereqError("Export not supported for instances with"
9591 " file-based disks", errors.ECODE_INVAL)
9593 def _CleanupExports(self, feedback_fn):
9594 """Removes exports of current instance from all other nodes.
9596 If an instance in a cluster with nodes A..D was exported to node C, its
9597 exports will be removed from the nodes A, B and D.
9600 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9602 nodelist = self.cfg.GetNodeList()
9603 nodelist.remove(self.dst_node.name)
9605 # on one-node clusters nodelist will be empty after the removal
9606 # if we proceed the backup would be removed because OpQueryExports
9607 # substitutes an empty list with the full cluster node list.
9608 iname = self.instance.name
9610 feedback_fn("Removing old exports for instance %s" % iname)
9611 exportlist = self.rpc.call_export_list(nodelist)
9612 for node in exportlist:
9613 if exportlist[node].fail_msg:
9615 if iname in exportlist[node].payload:
9616 msg = self.rpc.call_export_remove(node, iname).fail_msg
9618 self.LogWarning("Could not remove older export for instance %s"
9619 " on node %s: %s", iname, node, msg)
9621 def Exec(self, feedback_fn):
9622 """Export an instance to an image in the cluster.
9625 assert self.op.mode in constants.EXPORT_MODES
9627 instance = self.instance
9628 src_node = instance.primary_node
9630 if self.op.shutdown:
9631 # shutdown the instance, but not the disks
9632 feedback_fn("Shutting down instance %s" % instance.name)
9633 result = self.rpc.call_instance_shutdown(src_node, instance,
9634 self.op.shutdown_timeout)
9635 # TODO: Maybe ignore failures if ignore_remove_failures is set
9636 result.Raise("Could not shutdown instance %s on"
9637 " node %s" % (instance.name, src_node))
9639 # set the disks ID correctly since call_instance_start needs the
9640 # correct drbd minor to create the symlinks
9641 for disk in instance.disks:
9642 self.cfg.SetDiskID(disk, src_node)
9644 activate_disks = (not instance.admin_up)
9647 # Activate the instance disks if we'exporting a stopped instance
9648 feedback_fn("Activating disks for %s" % instance.name)
9649 _StartInstanceDisks(self, instance, None)
9652 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9655 helper.CreateSnapshots()
9657 if (self.op.shutdown and instance.admin_up and
9658 not self.op.remove_instance):
9659 assert not activate_disks
9660 feedback_fn("Starting instance %s" % instance.name)
9661 result = self.rpc.call_instance_start(src_node, instance, None, None)
9662 msg = result.fail_msg
9664 feedback_fn("Failed to start instance: %s" % msg)
9665 _ShutdownInstanceDisks(self, instance)
9666 raise errors.OpExecError("Could not start instance: %s" % msg)
9668 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9669 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9670 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9671 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9672 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9674 (key_name, _, _) = self.x509_key_name
9677 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9680 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9681 key_name, dest_ca_pem,
9686 # Check for backwards compatibility
9687 assert len(dresults) == len(instance.disks)
9688 assert compat.all(isinstance(i, bool) for i in dresults), \
9689 "Not all results are boolean: %r" % dresults
9693 feedback_fn("Deactivating disks for %s" % instance.name)
9694 _ShutdownInstanceDisks(self, instance)
9696 if not (compat.all(dresults) and fin_resu):
9699 failures.append("export finalization")
9700 if not compat.all(dresults):
9701 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9703 failures.append("disk export: disk(s) %s" % fdsk)
9705 raise errors.OpExecError("Export failed, errors in %s" %
9706 utils.CommaJoin(failures))
9708 # At this point, the export was successful, we can cleanup/finish
9710 # Remove instance if requested
9711 if self.op.remove_instance:
9712 feedback_fn("Removing instance %s" % instance.name)
9713 _RemoveInstance(self, feedback_fn, instance,
9714 self.op.ignore_remove_failures)
9716 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9717 self._CleanupExports(feedback_fn)
9719 return fin_resu, dresults
9722 class LURemoveExport(NoHooksLU):
9723 """Remove exports related to the named instance.
9731 def ExpandNames(self):
9732 self.needed_locks = {}
9733 # We need all nodes to be locked in order for RemoveExport to work, but we
9734 # don't need to lock the instance itself, as nothing will happen to it (and
9735 # we can remove exports also for a removed instance)
9736 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9738 def Exec(self, feedback_fn):
9739 """Remove any export.
9742 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9743 # If the instance was not found we'll try with the name that was passed in.
9744 # This will only work if it was an FQDN, though.
9746 if not instance_name:
9748 instance_name = self.op.instance_name
9750 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9751 exportlist = self.rpc.call_export_list(locked_nodes)
9753 for node in exportlist:
9754 msg = exportlist[node].fail_msg
9756 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9758 if instance_name in exportlist[node].payload:
9760 result = self.rpc.call_export_remove(node, instance_name)
9761 msg = result.fail_msg
9763 logging.error("Could not remove export for instance %s"
9764 " on node %s: %s", instance_name, node, msg)
9766 if fqdn_warn and not found:
9767 feedback_fn("Export not found. If trying to remove an export belonging"
9768 " to a deleted instance please use its Fully Qualified"
9772 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9775 This is an abstract class which is the parent of all the other tags LUs.
9779 def ExpandNames(self):
9780 self.needed_locks = {}
9781 if self.op.kind == constants.TAG_NODE:
9782 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9783 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9784 elif self.op.kind == constants.TAG_INSTANCE:
9785 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9786 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9788 def CheckPrereq(self):
9789 """Check prerequisites.
9792 if self.op.kind == constants.TAG_CLUSTER:
9793 self.target = self.cfg.GetClusterInfo()
9794 elif self.op.kind == constants.TAG_NODE:
9795 self.target = self.cfg.GetNodeInfo(self.op.name)
9796 elif self.op.kind == constants.TAG_INSTANCE:
9797 self.target = self.cfg.GetInstanceInfo(self.op.name)
9799 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9800 str(self.op.kind), errors.ECODE_INVAL)
9803 class LUGetTags(TagsLU):
9804 """Returns the tags of a given object.
9808 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9809 # Name is only meaningful for nodes and instances
9810 ("name", _NoDefault, _TMaybeString),
9814 def Exec(self, feedback_fn):
9815 """Returns the tag list.
9818 return list(self.target.GetTags())
9821 class LUSearchTags(NoHooksLU):
9822 """Searches the tags for a given pattern.
9826 ("pattern", _NoDefault, _TNonEmptyString),
9830 def ExpandNames(self):
9831 self.needed_locks = {}
9833 def CheckPrereq(self):
9834 """Check prerequisites.
9836 This checks the pattern passed for validity by compiling it.
9840 self.re = re.compile(self.op.pattern)
9841 except re.error, err:
9842 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9843 (self.op.pattern, err), errors.ECODE_INVAL)
9845 def Exec(self, feedback_fn):
9846 """Returns the tag list.
9850 tgts = [("/cluster", cfg.GetClusterInfo())]
9851 ilist = cfg.GetAllInstancesInfo().values()
9852 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9853 nlist = cfg.GetAllNodesInfo().values()
9854 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9856 for path, target in tgts:
9857 for tag in target.GetTags():
9858 if self.re.search(tag):
9859 results.append((path, tag))
9863 class LUAddTags(TagsLU):
9864 """Sets a tag on a given object.
9868 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9869 # Name is only meaningful for nodes and instances
9870 ("name", _NoDefault, _TMaybeString),
9871 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9875 def CheckPrereq(self):
9876 """Check prerequisites.
9878 This checks the type and length of the tag name and value.
9881 TagsLU.CheckPrereq(self)
9882 for tag in self.op.tags:
9883 objects.TaggableObject.ValidateTag(tag)
9885 def Exec(self, feedback_fn):
9890 for tag in self.op.tags:
9891 self.target.AddTag(tag)
9892 except errors.TagError, err:
9893 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9894 self.cfg.Update(self.target, feedback_fn)
9897 class LUDelTags(TagsLU):
9898 """Delete a list of tags from a given object.
9902 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9903 # Name is only meaningful for nodes and instances
9904 ("name", _NoDefault, _TMaybeString),
9905 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9909 def CheckPrereq(self):
9910 """Check prerequisites.
9912 This checks that we have the given tag.
9915 TagsLU.CheckPrereq(self)
9916 for tag in self.op.tags:
9917 objects.TaggableObject.ValidateTag(tag)
9918 del_tags = frozenset(self.op.tags)
9919 cur_tags = self.target.GetTags()
9920 if not del_tags <= cur_tags:
9921 diff_tags = del_tags - cur_tags
9922 diff_names = ["'%s'" % tag for tag in diff_tags]
9924 raise errors.OpPrereqError("Tag(s) %s not found" %
9925 (",".join(diff_names)), errors.ECODE_NOENT)
9927 def Exec(self, feedback_fn):
9928 """Remove the tag from the object.
9931 for tag in self.op.tags:
9932 self.target.RemoveTag(tag)
9933 self.cfg.Update(self.target, feedback_fn)
9936 class LUTestDelay(NoHooksLU):
9937 """Sleep for a specified amount of time.
9939 This LU sleeps on the master and/or nodes for a specified amount of
9944 ("duration", _NoDefault, _TFloat),
9945 ("on_master", True, _TBool),
9946 ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9947 ("repeat", 0, _TPositiveInt)
9951 def ExpandNames(self):
9952 """Expand names and set required locks.
9954 This expands the node list, if any.
9957 self.needed_locks = {}
9958 if self.op.on_nodes:
9959 # _GetWantedNodes can be used here, but is not always appropriate to use
9960 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9962 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9963 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9965 def _TestDelay(self):
9966 """Do the actual sleep.
9969 if self.op.on_master:
9970 if not utils.TestDelay(self.op.duration):
9971 raise errors.OpExecError("Error during master delay test")
9972 if self.op.on_nodes:
9973 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9974 for node, node_result in result.items():
9975 node_result.Raise("Failure during rpc call to node %s" % node)
9977 def Exec(self, feedback_fn):
9978 """Execute the test delay opcode, with the wanted repetitions.
9981 if self.op.repeat == 0:
9984 top_value = self.op.repeat - 1
9985 for i in range(self.op.repeat):
9986 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9990 class LUTestJobqueue(NoHooksLU):
9991 """Utility LU to test some aspects of the job queue.
9995 ("notify_waitlock", False, _TBool),
9996 ("notify_exec", False, _TBool),
9997 ("log_messages", _EmptyList, _TListOf(_TString)),
9998 ("fail", False, _TBool),
10002 # Must be lower than default timeout for WaitForJobChange to see whether it
10003 # notices changed jobs
10004 _CLIENT_CONNECT_TIMEOUT = 20.0
10005 _CLIENT_CONFIRM_TIMEOUT = 60.0
10008 def _NotifyUsingSocket(cls, cb, errcls):
10009 """Opens a Unix socket and waits for another program to connect.
10012 @param cb: Callback to send socket name to client
10013 @type errcls: class
10014 @param errcls: Exception class to use for errors
10017 # Using a temporary directory as there's no easy way to create temporary
10018 # sockets without writing a custom loop around tempfile.mktemp and
10020 tmpdir = tempfile.mkdtemp()
10022 tmpsock = utils.PathJoin(tmpdir, "sock")
10024 logging.debug("Creating temporary socket at %s", tmpsock)
10025 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10030 # Send details to client
10033 # Wait for client to connect before continuing
10034 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10036 (conn, _) = sock.accept()
10037 except socket.error, err:
10038 raise errcls("Client didn't connect in time (%s)" % err)
10042 # Remove as soon as client is connected
10043 shutil.rmtree(tmpdir)
10045 # Wait for client to close
10048 # pylint: disable-msg=E1101
10049 # Instance of '_socketobject' has no ... member
10050 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10052 except socket.error, err:
10053 raise errcls("Client failed to confirm notification (%s)" % err)
10057 def _SendNotification(self, test, arg, sockname):
10058 """Sends a notification to the client.
10061 @param test: Test name
10062 @param arg: Test argument (depends on test)
10063 @type sockname: string
10064 @param sockname: Socket path
10067 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10069 def _Notify(self, prereq, test, arg):
10070 """Notifies the client of a test.
10073 @param prereq: Whether this is a prereq-phase test
10075 @param test: Test name
10076 @param arg: Test argument (depends on test)
10080 errcls = errors.OpPrereqError
10082 errcls = errors.OpExecError
10084 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10088 def CheckArguments(self):
10089 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10090 self.expandnames_calls = 0
10092 def ExpandNames(self):
10093 checkargs_calls = getattr(self, "checkargs_calls", 0)
10094 if checkargs_calls < 1:
10095 raise errors.ProgrammerError("CheckArguments was not called")
10097 self.expandnames_calls += 1
10099 if self.op.notify_waitlock:
10100 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10102 self.LogInfo("Expanding names")
10104 # Get lock on master node (just to get a lock, not for a particular reason)
10105 self.needed_locks = {
10106 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10109 def Exec(self, feedback_fn):
10110 if self.expandnames_calls < 1:
10111 raise errors.ProgrammerError("ExpandNames was not called")
10113 if self.op.notify_exec:
10114 self._Notify(False, constants.JQT_EXEC, None)
10116 self.LogInfo("Executing")
10118 if self.op.log_messages:
10119 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10120 for idx, msg in enumerate(self.op.log_messages):
10121 self.LogInfo("Sending log message %s", idx + 1)
10122 feedback_fn(constants.JQT_MSGPREFIX + msg)
10123 # Report how many test messages have been sent
10124 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10127 raise errors.OpExecError("Opcode failure was requested")
10132 class IAllocator(object):
10133 """IAllocator framework.
10135 An IAllocator instance has three sets of attributes:
10136 - cfg that is needed to query the cluster
10137 - input data (all members of the _KEYS class attribute are required)
10138 - four buffer attributes (in|out_data|text), that represent the
10139 input (to the external script) in text and data structure format,
10140 and the output from it, again in two formats
10141 - the result variables from the script (success, info, nodes) for
10145 # pylint: disable-msg=R0902
10146 # lots of instance attributes
10148 "name", "mem_size", "disks", "disk_template",
10149 "os", "tags", "nics", "vcpus", "hypervisor",
10152 "name", "relocate_from",
10158 def __init__(self, cfg, rpc, mode, **kwargs):
10161 # init buffer variables
10162 self.in_text = self.out_text = self.in_data = self.out_data = None
10163 # init all input fields so that pylint is happy
10165 self.mem_size = self.disks = self.disk_template = None
10166 self.os = self.tags = self.nics = self.vcpus = None
10167 self.hypervisor = None
10168 self.relocate_from = None
10170 self.evac_nodes = None
10172 self.required_nodes = None
10173 # init result fields
10174 self.success = self.info = self.result = None
10175 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10176 keyset = self._ALLO_KEYS
10177 fn = self._AddNewInstance
10178 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10179 keyset = self._RELO_KEYS
10180 fn = self._AddRelocateInstance
10181 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10182 keyset = self._EVAC_KEYS
10183 fn = self._AddEvacuateNodes
10185 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10186 " IAllocator" % self.mode)
10188 if key not in keyset:
10189 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10190 " IAllocator" % key)
10191 setattr(self, key, kwargs[key])
10194 if key not in kwargs:
10195 raise errors.ProgrammerError("Missing input parameter '%s' to"
10196 " IAllocator" % key)
10197 self._BuildInputData(fn)
10199 def _ComputeClusterData(self):
10200 """Compute the generic allocator input data.
10202 This is the data that is independent of the actual operation.
10206 cluster_info = cfg.GetClusterInfo()
10209 "version": constants.IALLOCATOR_VERSION,
10210 "cluster_name": cfg.GetClusterName(),
10211 "cluster_tags": list(cluster_info.GetTags()),
10212 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10213 # we don't have job IDs
10215 iinfo = cfg.GetAllInstancesInfo().values()
10216 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10220 node_list = cfg.GetNodeList()
10222 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10223 hypervisor_name = self.hypervisor
10224 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10225 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10226 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10227 hypervisor_name = cluster_info.enabled_hypervisors[0]
10229 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10232 self.rpc.call_all_instances_info(node_list,
10233 cluster_info.enabled_hypervisors)
10234 for nname, nresult in node_data.items():
10235 # first fill in static (config-based) values
10236 ninfo = cfg.GetNodeInfo(nname)
10238 "tags": list(ninfo.GetTags()),
10239 "primary_ip": ninfo.primary_ip,
10240 "secondary_ip": ninfo.secondary_ip,
10241 "offline": ninfo.offline,
10242 "drained": ninfo.drained,
10243 "master_candidate": ninfo.master_candidate,
10246 if not (ninfo.offline or ninfo.drained):
10247 nresult.Raise("Can't get data for node %s" % nname)
10248 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10250 remote_info = nresult.payload
10252 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10253 'vg_size', 'vg_free', 'cpu_total']:
10254 if attr not in remote_info:
10255 raise errors.OpExecError("Node '%s' didn't return attribute"
10256 " '%s'" % (nname, attr))
10257 if not isinstance(remote_info[attr], int):
10258 raise errors.OpExecError("Node '%s' returned invalid value"
10260 (nname, attr, remote_info[attr]))
10261 # compute memory used by primary instances
10262 i_p_mem = i_p_up_mem = 0
10263 for iinfo, beinfo in i_list:
10264 if iinfo.primary_node == nname:
10265 i_p_mem += beinfo[constants.BE_MEMORY]
10266 if iinfo.name not in node_iinfo[nname].payload:
10269 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10270 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10271 remote_info['memory_free'] -= max(0, i_mem_diff)
10274 i_p_up_mem += beinfo[constants.BE_MEMORY]
10276 # compute memory used by instances
10278 "total_memory": remote_info['memory_total'],
10279 "reserved_memory": remote_info['memory_dom0'],
10280 "free_memory": remote_info['memory_free'],
10281 "total_disk": remote_info['vg_size'],
10282 "free_disk": remote_info['vg_free'],
10283 "total_cpus": remote_info['cpu_total'],
10284 "i_pri_memory": i_p_mem,
10285 "i_pri_up_memory": i_p_up_mem,
10287 pnr.update(pnr_dyn)
10289 node_results[nname] = pnr
10290 data["nodes"] = node_results
10294 for iinfo, beinfo in i_list:
10296 for nic in iinfo.nics:
10297 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10298 nic_dict = {"mac": nic.mac,
10300 "mode": filled_params[constants.NIC_MODE],
10301 "link": filled_params[constants.NIC_LINK],
10303 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10304 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10305 nic_data.append(nic_dict)
10307 "tags": list(iinfo.GetTags()),
10308 "admin_up": iinfo.admin_up,
10309 "vcpus": beinfo[constants.BE_VCPUS],
10310 "memory": beinfo[constants.BE_MEMORY],
10312 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10314 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10315 "disk_template": iinfo.disk_template,
10316 "hypervisor": iinfo.hypervisor,
10318 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10320 instance_data[iinfo.name] = pir
10322 data["instances"] = instance_data
10324 self.in_data = data
10326 def _AddNewInstance(self):
10327 """Add new instance data to allocator structure.
10329 This in combination with _AllocatorGetClusterData will create the
10330 correct structure needed as input for the allocator.
10332 The checks for the completeness of the opcode must have already been
10336 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10338 if self.disk_template in constants.DTS_NET_MIRROR:
10339 self.required_nodes = 2
10341 self.required_nodes = 1
10344 "disk_template": self.disk_template,
10347 "vcpus": self.vcpus,
10348 "memory": self.mem_size,
10349 "disks": self.disks,
10350 "disk_space_total": disk_space,
10352 "required_nodes": self.required_nodes,
10356 def _AddRelocateInstance(self):
10357 """Add relocate instance data to allocator structure.
10359 This in combination with _IAllocatorGetClusterData will create the
10360 correct structure needed as input for the allocator.
10362 The checks for the completeness of the opcode must have already been
10366 instance = self.cfg.GetInstanceInfo(self.name)
10367 if instance is None:
10368 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10369 " IAllocator" % self.name)
10371 if instance.disk_template not in constants.DTS_NET_MIRROR:
10372 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10373 errors.ECODE_INVAL)
10375 if len(instance.secondary_nodes) != 1:
10376 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10377 errors.ECODE_STATE)
10379 self.required_nodes = 1
10380 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10381 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10385 "disk_space_total": disk_space,
10386 "required_nodes": self.required_nodes,
10387 "relocate_from": self.relocate_from,
10391 def _AddEvacuateNodes(self):
10392 """Add evacuate nodes data to allocator structure.
10396 "evac_nodes": self.evac_nodes
10400 def _BuildInputData(self, fn):
10401 """Build input data structures.
10404 self._ComputeClusterData()
10407 request["type"] = self.mode
10408 self.in_data["request"] = request
10410 self.in_text = serializer.Dump(self.in_data)
10412 def Run(self, name, validate=True, call_fn=None):
10413 """Run an instance allocator and return the results.
10416 if call_fn is None:
10417 call_fn = self.rpc.call_iallocator_runner
10419 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10420 result.Raise("Failure while running the iallocator script")
10422 self.out_text = result.payload
10424 self._ValidateResult()
10426 def _ValidateResult(self):
10427 """Process the allocator results.
10429 This will process and if successful save the result in
10430 self.out_data and the other parameters.
10434 rdict = serializer.Load(self.out_text)
10435 except Exception, err:
10436 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10438 if not isinstance(rdict, dict):
10439 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10441 # TODO: remove backwards compatiblity in later versions
10442 if "nodes" in rdict and "result" not in rdict:
10443 rdict["result"] = rdict["nodes"]
10446 for key in "success", "info", "result":
10447 if key not in rdict:
10448 raise errors.OpExecError("Can't parse iallocator results:"
10449 " missing key '%s'" % key)
10450 setattr(self, key, rdict[key])
10452 if not isinstance(rdict["result"], list):
10453 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10455 self.out_data = rdict
10458 class LUTestAllocator(NoHooksLU):
10459 """Run allocator tests.
10461 This LU runs the allocator tests
10465 ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10466 ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10467 ("name", _NoDefault, _TNonEmptyString),
10468 ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10469 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10470 _TOr(_TNone, _TNonEmptyString))))),
10471 ("disks", _NoDefault, _TOr(_TNone, _TList)),
10472 ("hypervisor", None, _TMaybeString),
10473 ("allocator", None, _TMaybeString),
10474 ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10475 ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10476 ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10477 ("os", None, _TMaybeString),
10478 ("disk_template", None, _TMaybeString),
10479 ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10482 def CheckPrereq(self):
10483 """Check prerequisites.
10485 This checks the opcode parameters depending on the director and mode test.
10488 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10489 for attr in ["mem_size", "disks", "disk_template",
10490 "os", "tags", "nics", "vcpus"]:
10491 if not hasattr(self.op, attr):
10492 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10493 attr, errors.ECODE_INVAL)
10494 iname = self.cfg.ExpandInstanceName(self.op.name)
10495 if iname is not None:
10496 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10497 iname, errors.ECODE_EXISTS)
10498 if not isinstance(self.op.nics, list):
10499 raise errors.OpPrereqError("Invalid parameter 'nics'",
10500 errors.ECODE_INVAL)
10501 if not isinstance(self.op.disks, list):
10502 raise errors.OpPrereqError("Invalid parameter 'disks'",
10503 errors.ECODE_INVAL)
10504 for row in self.op.disks:
10505 if (not isinstance(row, dict) or
10506 "size" not in row or
10507 not isinstance(row["size"], int) or
10508 "mode" not in row or
10509 row["mode"] not in ['r', 'w']):
10510 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10511 " parameter", errors.ECODE_INVAL)
10512 if self.op.hypervisor is None:
10513 self.op.hypervisor = self.cfg.GetHypervisorType()
10514 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10515 fname = _ExpandInstanceName(self.cfg, self.op.name)
10516 self.op.name = fname
10517 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10518 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10519 if not hasattr(self.op, "evac_nodes"):
10520 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10521 " opcode input", errors.ECODE_INVAL)
10523 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10524 self.op.mode, errors.ECODE_INVAL)
10526 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10527 if self.op.allocator is None:
10528 raise errors.OpPrereqError("Missing allocator name",
10529 errors.ECODE_INVAL)
10530 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10531 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10532 self.op.direction, errors.ECODE_INVAL)
10534 def Exec(self, feedback_fn):
10535 """Run the allocator test.
10538 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10539 ial = IAllocator(self.cfg, self.rpc,
10542 mem_size=self.op.mem_size,
10543 disks=self.op.disks,
10544 disk_template=self.op.disk_template,
10548 vcpus=self.op.vcpus,
10549 hypervisor=self.op.hypervisor,
10551 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10552 ial = IAllocator(self.cfg, self.rpc,
10555 relocate_from=list(self.relocate_from),
10557 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10558 ial = IAllocator(self.cfg, self.rpc,
10560 evac_nodes=self.op.evac_nodes)
10562 raise errors.ProgrammerError("Uncatched mode %s in"
10563 " LUTestAllocator.Exec", self.op.mode)
10565 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10566 result = ial.in_text
10568 ial.Run(self.op.allocator, validate=False)
10569 result = ial.out_text