4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
57 import ganeti.masterd.instance # pylint: disable-msg=W0611
60 # Modifiable default values; need to define these here before the
64 """Returns an empty list.
71 """Returns an empty dict.
77 #: The without-default default value
81 #: The no-type (value to complex to check it in the type system)
87 """Checks if the given value is not None.
90 return val is not None
94 """Checks if the given value is None.
101 """Checks if the given value is a boolean.
104 return isinstance(val, bool)
108 """Checks if the given value is an integer.
111 return isinstance(val, int)
115 """Checks if the given value is a float.
118 return isinstance(val, float)
122 """Checks if the given value is a string.
125 return isinstance(val, basestring)
129 """Checks if a given value evaluates to a boolean True value.
135 def _TElemOf(target_list):
136 """Builds a function that checks if a given value is a member of a list.
139 return lambda val: val in target_list
144 """Checks if the given value is a list.
147 return isinstance(val, list)
151 """Checks if the given value is a dictionary.
154 return isinstance(val, dict)
157 def _TIsLength(size):
158 """Check is the given container is of the given size.
161 return lambda container: len(container) == size
166 """Combine multiple functions using an AND operation.
170 return compat.all(t(val) for t in args)
175 """Combine multiple functions using an AND operation.
179 return compat.any(t(val) for t in args)
184 """Checks that a modified version of the argument passes the given test.
187 return lambda val: test(fn(val))
192 #: a non-empty string
193 _TNonEmptyString = _TAnd(_TString, _TTrue)
196 #: a maybe non-empty string
197 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
200 #: a maybe boolean (bool or none)
201 _TMaybeBool = _TOr(_TBool, _TNone)
204 #: a positive integer
205 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
207 #: a strictly positive integer
208 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
211 def _TListOf(my_type):
212 """Checks if a given value is a list with all elements of the same type.
216 lambda lst: compat.all(my_type(v) for v in lst))
219 def _TDictOf(key_type, val_type):
220 """Checks a dict type for the type of its key/values.
224 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
225 and compat.all(val_type(v)
226 for v in my_dict.values())))
229 # Common opcode attributes
231 #: output fields for a query operation
232 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
235 #: the shutdown timeout
236 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
239 #: the force parameter
240 _PForce = ("force", False, _TBool)
242 #: a required instance name (for single-instance LUs)
243 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
246 #: a required node name (for single-node LUs)
247 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
249 #: the migration type (live/non-live)
250 _PMigrationMode = ("mode", None, _TOr(_TNone,
251 _TElemOf(constants.HT_MIGRATION_MODES)))
253 #: the obsolete 'live' mode (boolean)
254 _PMigrationLive = ("live", None, _TMaybeBool)
258 class LogicalUnit(object):
259 """Logical Unit base class.
261 Subclasses must follow these rules:
262 - implement ExpandNames
263 - implement CheckPrereq (except when tasklets are used)
264 - implement Exec (except when tasklets are used)
265 - implement BuildHooksEnv
266 - redefine HPATH and HTYPE
267 - optionally redefine their run requirements:
268 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
270 Note that all commands require root permissions.
272 @ivar dry_run_result: the value (if any) that will be returned to the caller
273 in dry-run mode (signalled by opcode dry_run parameter)
274 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
275 they should get if not already defined, and types they must match
283 def __init__(self, processor, op, context, rpc):
284 """Constructor for LogicalUnit.
286 This needs to be overridden in derived classes in order to check op
290 self.proc = processor
292 self.cfg = context.cfg
293 self.context = context
295 # Dicts used to declare locking needs to mcpu
296 self.needed_locks = None
297 self.acquired_locks = {}
298 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
300 self.remove_locks = {}
301 # Used to force good behavior when calling helper functions
302 self.recalculate_locks = {}
305 self.Log = processor.Log # pylint: disable-msg=C0103
306 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
307 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
308 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
309 # support for dry-run
310 self.dry_run_result = None
311 # support for generic debug attribute
312 if (not hasattr(self.op, "debug_level") or
313 not isinstance(self.op.debug_level, int)):
314 self.op.debug_level = 0
319 # The new kind-of-type-system
320 op_id = self.op.OP_ID
321 for attr_name, aval, test in self._OP_PARAMS:
322 if not hasattr(op, attr_name):
323 if aval == _NoDefault:
324 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
325 (op_id, attr_name), errors.ECODE_INVAL)
331 setattr(self.op, attr_name, dval)
332 attr_val = getattr(op, attr_name)
336 if not callable(test):
337 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
338 " given type is not a proper type (%s)" %
339 (op_id, attr_name, test))
340 if not test(attr_val):
341 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
342 self.op.OP_ID, attr_name, type(attr_val), attr_val)
343 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
344 (op_id, attr_name), errors.ECODE_INVAL)
346 self.CheckArguments()
349 """Returns the SshRunner object
353 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
356 ssh = property(fget=__GetSSH)
358 def CheckArguments(self):
359 """Check syntactic validity for the opcode arguments.
361 This method is for doing a simple syntactic check and ensure
362 validity of opcode parameters, without any cluster-related
363 checks. While the same can be accomplished in ExpandNames and/or
364 CheckPrereq, doing these separate is better because:
366 - ExpandNames is left as as purely a lock-related function
367 - CheckPrereq is run after we have acquired locks (and possible
370 The function is allowed to change the self.op attribute so that
371 later methods can no longer worry about missing parameters.
376 def ExpandNames(self):
377 """Expand names for this LU.
379 This method is called before starting to execute the opcode, and it should
380 update all the parameters of the opcode to their canonical form (e.g. a
381 short node name must be fully expanded after this method has successfully
382 completed). This way locking, hooks, logging, ecc. can work correctly.
384 LUs which implement this method must also populate the self.needed_locks
385 member, as a dict with lock levels as keys, and a list of needed lock names
388 - use an empty dict if you don't need any lock
389 - if you don't need any lock at a particular level omit that level
390 - don't put anything for the BGL level
391 - if you want all locks at a level use locking.ALL_SET as a value
393 If you need to share locks (rather than acquire them exclusively) at one
394 level you can modify self.share_locks, setting a true value (usually 1) for
395 that level. By default locks are not shared.
397 This function can also define a list of tasklets, which then will be
398 executed in order instead of the usual LU-level CheckPrereq and Exec
399 functions, if those are not defined by the LU.
403 # Acquire all nodes and one instance
404 self.needed_locks = {
405 locking.LEVEL_NODE: locking.ALL_SET,
406 locking.LEVEL_INSTANCE: ['instance1.example.com'],
408 # Acquire just two nodes
409 self.needed_locks = {
410 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
413 self.needed_locks = {} # No, you can't leave it to the default value None
416 # The implementation of this method is mandatory only if the new LU is
417 # concurrent, so that old LUs don't need to be changed all at the same
420 self.needed_locks = {} # Exclusive LUs don't need locks.
422 raise NotImplementedError
424 def DeclareLocks(self, level):
425 """Declare LU locking needs for a level
427 While most LUs can just declare their locking needs at ExpandNames time,
428 sometimes there's the need to calculate some locks after having acquired
429 the ones before. This function is called just before acquiring locks at a
430 particular level, but after acquiring the ones at lower levels, and permits
431 such calculations. It can be used to modify self.needed_locks, and by
432 default it does nothing.
434 This function is only called if you have something already set in
435 self.needed_locks for the level.
437 @param level: Locking level which is going to be locked
438 @type level: member of ganeti.locking.LEVELS
442 def CheckPrereq(self):
443 """Check prerequisites for this LU.
445 This method should check that the prerequisites for the execution
446 of this LU are fulfilled. It can do internode communication, but
447 it should be idempotent - no cluster or system changes are
450 The method should raise errors.OpPrereqError in case something is
451 not fulfilled. Its return value is ignored.
453 This method should also update all the parameters of the opcode to
454 their canonical form if it hasn't been done by ExpandNames before.
457 if self.tasklets is not None:
458 for (idx, tl) in enumerate(self.tasklets):
459 logging.debug("Checking prerequisites for tasklet %s/%s",
460 idx + 1, len(self.tasklets))
465 def Exec(self, feedback_fn):
468 This method should implement the actual work. It should raise
469 errors.OpExecError for failures that are somewhat dealt with in
473 if self.tasklets is not None:
474 for (idx, tl) in enumerate(self.tasklets):
475 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
478 raise NotImplementedError
480 def BuildHooksEnv(self):
481 """Build hooks environment for this LU.
483 This method should return a three-node tuple consisting of: a dict
484 containing the environment that will be used for running the
485 specific hook for this LU, a list of node names on which the hook
486 should run before the execution, and a list of node names on which
487 the hook should run after the execution.
489 The keys of the dict must not have 'GANETI_' prefixed as this will
490 be handled in the hooks runner. Also note additional keys will be
491 added by the hooks runner. If the LU doesn't define any
492 environment, an empty dict (and not None) should be returned.
494 No nodes should be returned as an empty list (and not None).
496 Note that if the HPATH for a LU class is None, this function will
500 raise NotImplementedError
502 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
503 """Notify the LU about the results of its hooks.
505 This method is called every time a hooks phase is executed, and notifies
506 the Logical Unit about the hooks' result. The LU can then use it to alter
507 its result based on the hooks. By default the method does nothing and the
508 previous result is passed back unchanged but any LU can define it if it
509 wants to use the local cluster hook-scripts somehow.
511 @param phase: one of L{constants.HOOKS_PHASE_POST} or
512 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
513 @param hook_results: the results of the multi-node hooks rpc call
514 @param feedback_fn: function used send feedback back to the caller
515 @param lu_result: the previous Exec result this LU had, or None
517 @return: the new Exec result, based on the previous result
521 # API must be kept, thus we ignore the unused argument and could
522 # be a function warnings
523 # pylint: disable-msg=W0613,R0201
526 def _ExpandAndLockInstance(self):
527 """Helper function to expand and lock an instance.
529 Many LUs that work on an instance take its name in self.op.instance_name
530 and need to expand it and then declare the expanded name for locking. This
531 function does it, and then updates self.op.instance_name to the expanded
532 name. It also initializes needed_locks as a dict, if this hasn't been done
536 if self.needed_locks is None:
537 self.needed_locks = {}
539 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
540 "_ExpandAndLockInstance called with instance-level locks set"
541 self.op.instance_name = _ExpandInstanceName(self.cfg,
542 self.op.instance_name)
543 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
545 def _LockInstancesNodes(self, primary_only=False):
546 """Helper function to declare instances' nodes for locking.
548 This function should be called after locking one or more instances to lock
549 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
550 with all primary or secondary nodes for instances already locked and
551 present in self.needed_locks[locking.LEVEL_INSTANCE].
553 It should be called from DeclareLocks, and for safety only works if
554 self.recalculate_locks[locking.LEVEL_NODE] is set.
556 In the future it may grow parameters to just lock some instance's nodes, or
557 to just lock primaries or secondary nodes, if needed.
559 If should be called in DeclareLocks in a way similar to::
561 if level == locking.LEVEL_NODE:
562 self._LockInstancesNodes()
564 @type primary_only: boolean
565 @param primary_only: only lock primary nodes of locked instances
568 assert locking.LEVEL_NODE in self.recalculate_locks, \
569 "_LockInstancesNodes helper function called with no nodes to recalculate"
571 # TODO: check if we're really been called with the instance locks held
573 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
574 # future we might want to have different behaviors depending on the value
575 # of self.recalculate_locks[locking.LEVEL_NODE]
577 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
578 instance = self.context.cfg.GetInstanceInfo(instance_name)
579 wanted_nodes.append(instance.primary_node)
581 wanted_nodes.extend(instance.secondary_nodes)
583 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
584 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
585 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
586 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
588 del self.recalculate_locks[locking.LEVEL_NODE]
591 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
592 """Simple LU which runs no hooks.
594 This LU is intended as a parent for other LogicalUnits which will
595 run no hooks, in order to reduce duplicate code.
601 def BuildHooksEnv(self):
602 """Empty BuildHooksEnv for NoHooksLu.
604 This just raises an error.
607 assert False, "BuildHooksEnv called for NoHooksLUs"
611 """Tasklet base class.
613 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
614 they can mix legacy code with tasklets. Locking needs to be done in the LU,
615 tasklets know nothing about locks.
617 Subclasses must follow these rules:
618 - Implement CheckPrereq
622 def __init__(self, lu):
629 def CheckPrereq(self):
630 """Check prerequisites for this tasklets.
632 This method should check whether the prerequisites for the execution of
633 this tasklet are fulfilled. It can do internode communication, but it
634 should be idempotent - no cluster or system changes are allowed.
636 The method should raise errors.OpPrereqError in case something is not
637 fulfilled. Its return value is ignored.
639 This method should also update all parameters to their canonical form if it
640 hasn't been done before.
645 def Exec(self, feedback_fn):
646 """Execute the tasklet.
648 This method should implement the actual work. It should raise
649 errors.OpExecError for failures that are somewhat dealt with in code, or
653 raise NotImplementedError
656 def _GetWantedNodes(lu, nodes):
657 """Returns list of checked and expanded node names.
659 @type lu: L{LogicalUnit}
660 @param lu: the logical unit on whose behalf we execute
662 @param nodes: list of node names or None for all nodes
664 @return: the list of nodes, sorted
665 @raise errors.ProgrammerError: if the nodes parameter is wrong type
669 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
670 " non-empty list of nodes whose name is to be expanded.")
672 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
673 return utils.NiceSort(wanted)
676 def _GetWantedInstances(lu, instances):
677 """Returns list of checked and expanded instance names.
679 @type lu: L{LogicalUnit}
680 @param lu: the logical unit on whose behalf we execute
681 @type instances: list
682 @param instances: list of instance names or None for all instances
684 @return: the list of instances, sorted
685 @raise errors.OpPrereqError: if the instances parameter is wrong type
686 @raise errors.OpPrereqError: if any of the passed instances is not found
690 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
692 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
696 def _GetUpdatedParams(old_params, update_dict,
697 use_default=True, use_none=False):
698 """Return the new version of a parameter dictionary.
700 @type old_params: dict
701 @param old_params: old parameters
702 @type update_dict: dict
703 @param update_dict: dict containing new parameter values, or
704 constants.VALUE_DEFAULT to reset the parameter to its default
706 @param use_default: boolean
707 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
708 values as 'to be deleted' values
709 @param use_none: boolean
710 @type use_none: whether to recognise C{None} values as 'to be
713 @return: the new parameter dictionary
716 params_copy = copy.deepcopy(old_params)
717 for key, val in update_dict.iteritems():
718 if ((use_default and val == constants.VALUE_DEFAULT) or
719 (use_none and val is None)):
725 params_copy[key] = val
729 def _CheckOutputFields(static, dynamic, selected):
730 """Checks whether all selected fields are valid.
732 @type static: L{utils.FieldSet}
733 @param static: static fields set
734 @type dynamic: L{utils.FieldSet}
735 @param dynamic: dynamic fields set
742 delta = f.NonMatching(selected)
744 raise errors.OpPrereqError("Unknown output fields selected: %s"
745 % ",".join(delta), errors.ECODE_INVAL)
748 def _CheckGlobalHvParams(params):
749 """Validates that given hypervisor params are not global ones.
751 This will ensure that instances don't get customised versions of
755 used_globals = constants.HVC_GLOBALS.intersection(params)
757 msg = ("The following hypervisor parameters are global and cannot"
758 " be customized at instance level, please modify them at"
759 " cluster level: %s" % utils.CommaJoin(used_globals))
760 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
763 def _CheckNodeOnline(lu, node):
764 """Ensure that a given node is online.
766 @param lu: the LU on behalf of which we make the check
767 @param node: the node to check
768 @raise errors.OpPrereqError: if the node is offline
771 if lu.cfg.GetNodeInfo(node).offline:
772 raise errors.OpPrereqError("Can't use offline node %s" % node,
776 def _CheckNodeNotDrained(lu, node):
777 """Ensure that a given node is not drained.
779 @param lu: the LU on behalf of which we make the check
780 @param node: the node to check
781 @raise errors.OpPrereqError: if the node is drained
784 if lu.cfg.GetNodeInfo(node).drained:
785 raise errors.OpPrereqError("Can't use drained node %s" % node,
789 def _CheckNodeHasOS(lu, node, os_name, force_variant):
790 """Ensure that a node supports a given OS.
792 @param lu: the LU on behalf of which we make the check
793 @param node: the node to check
794 @param os_name: the OS to query about
795 @param force_variant: whether to ignore variant errors
796 @raise errors.OpPrereqError: if the node is not supporting the OS
799 result = lu.rpc.call_os_get(node, os_name)
800 result.Raise("OS '%s' not in supported OS list for node %s" %
802 prereq=True, ecode=errors.ECODE_INVAL)
803 if not force_variant:
804 _CheckOSVariant(result.payload, os_name)
807 def _RequireFileStorage():
808 """Checks that file storage is enabled.
810 @raise errors.OpPrereqError: when file storage is disabled
813 if not constants.ENABLE_FILE_STORAGE:
814 raise errors.OpPrereqError("File storage disabled at configure time",
818 def _CheckDiskTemplate(template):
819 """Ensure a given disk template is valid.
822 if template not in constants.DISK_TEMPLATES:
823 msg = ("Invalid disk template name '%s', valid templates are: %s" %
824 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
825 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
826 if template == constants.DT_FILE:
827 _RequireFileStorage()
831 def _CheckStorageType(storage_type):
832 """Ensure a given storage type is valid.
835 if storage_type not in constants.VALID_STORAGE_TYPES:
836 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
838 if storage_type == constants.ST_FILE:
839 _RequireFileStorage()
843 def _GetClusterDomainSecret():
844 """Reads the cluster domain secret.
847 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
851 def _CheckInstanceDown(lu, instance, reason):
852 """Ensure that an instance is not running."""
853 if instance.admin_up:
854 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
855 (instance.name, reason), errors.ECODE_STATE)
857 pnode = instance.primary_node
858 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
859 ins_l.Raise("Can't contact node %s for instance information" % pnode,
860 prereq=True, ecode=errors.ECODE_ENVIRON)
862 if instance.name in ins_l.payload:
863 raise errors.OpPrereqError("Instance %s is running, %s" %
864 (instance.name, reason), errors.ECODE_STATE)
867 def _ExpandItemName(fn, name, kind):
868 """Expand an item name.
870 @param fn: the function to use for expansion
871 @param name: requested item name
872 @param kind: text description ('Node' or 'Instance')
873 @return: the resolved (full) name
874 @raise errors.OpPrereqError: if the item is not found
878 if full_name is None:
879 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
884 def _ExpandNodeName(cfg, name):
885 """Wrapper over L{_ExpandItemName} for nodes."""
886 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
889 def _ExpandInstanceName(cfg, name):
890 """Wrapper over L{_ExpandItemName} for instance."""
891 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
894 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
895 memory, vcpus, nics, disk_template, disks,
896 bep, hvp, hypervisor_name):
897 """Builds instance related env variables for hooks
899 This builds the hook environment from individual variables.
902 @param name: the name of the instance
903 @type primary_node: string
904 @param primary_node: the name of the instance's primary node
905 @type secondary_nodes: list
906 @param secondary_nodes: list of secondary nodes as strings
907 @type os_type: string
908 @param os_type: the name of the instance's OS
909 @type status: boolean
910 @param status: the should_run status of the instance
912 @param memory: the memory size of the instance
914 @param vcpus: the count of VCPUs the instance has
916 @param nics: list of tuples (ip, mac, mode, link) representing
917 the NICs the instance has
918 @type disk_template: string
919 @param disk_template: the disk template of the instance
921 @param disks: the list of (size, mode) pairs
923 @param bep: the backend parameters for the instance
925 @param hvp: the hypervisor parameters for the instance
926 @type hypervisor_name: string
927 @param hypervisor_name: the hypervisor for the instance
929 @return: the hook environment for this instance
938 "INSTANCE_NAME": name,
939 "INSTANCE_PRIMARY": primary_node,
940 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
941 "INSTANCE_OS_TYPE": os_type,
942 "INSTANCE_STATUS": str_status,
943 "INSTANCE_MEMORY": memory,
944 "INSTANCE_VCPUS": vcpus,
945 "INSTANCE_DISK_TEMPLATE": disk_template,
946 "INSTANCE_HYPERVISOR": hypervisor_name,
950 nic_count = len(nics)
951 for idx, (ip, mac, mode, link) in enumerate(nics):
954 env["INSTANCE_NIC%d_IP" % idx] = ip
955 env["INSTANCE_NIC%d_MAC" % idx] = mac
956 env["INSTANCE_NIC%d_MODE" % idx] = mode
957 env["INSTANCE_NIC%d_LINK" % idx] = link
958 if mode == constants.NIC_MODE_BRIDGED:
959 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
963 env["INSTANCE_NIC_COUNT"] = nic_count
966 disk_count = len(disks)
967 for idx, (size, mode) in enumerate(disks):
968 env["INSTANCE_DISK%d_SIZE" % idx] = size
969 env["INSTANCE_DISK%d_MODE" % idx] = mode
973 env["INSTANCE_DISK_COUNT"] = disk_count
975 for source, kind in [(bep, "BE"), (hvp, "HV")]:
976 for key, value in source.items():
977 env["INSTANCE_%s_%s" % (kind, key)] = value
982 def _NICListToTuple(lu, nics):
983 """Build a list of nic information tuples.
985 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
986 value in LUQueryInstanceData.
988 @type lu: L{LogicalUnit}
989 @param lu: the logical unit on whose behalf we execute
990 @type nics: list of L{objects.NIC}
991 @param nics: list of nics to convert to hooks tuples
995 cluster = lu.cfg.GetClusterInfo()
999 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1000 mode = filled_params[constants.NIC_MODE]
1001 link = filled_params[constants.NIC_LINK]
1002 hooks_nics.append((ip, mac, mode, link))
1006 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1007 """Builds instance related env variables for hooks from an object.
1009 @type lu: L{LogicalUnit}
1010 @param lu: the logical unit on whose behalf we execute
1011 @type instance: L{objects.Instance}
1012 @param instance: the instance for which we should build the
1014 @type override: dict
1015 @param override: dictionary with key/values that will override
1018 @return: the hook environment dictionary
1021 cluster = lu.cfg.GetClusterInfo()
1022 bep = cluster.FillBE(instance)
1023 hvp = cluster.FillHV(instance)
1025 'name': instance.name,
1026 'primary_node': instance.primary_node,
1027 'secondary_nodes': instance.secondary_nodes,
1028 'os_type': instance.os,
1029 'status': instance.admin_up,
1030 'memory': bep[constants.BE_MEMORY],
1031 'vcpus': bep[constants.BE_VCPUS],
1032 'nics': _NICListToTuple(lu, instance.nics),
1033 'disk_template': instance.disk_template,
1034 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1037 'hypervisor_name': instance.hypervisor,
1040 args.update(override)
1041 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1044 def _AdjustCandidatePool(lu, exceptions):
1045 """Adjust the candidate pool after node operations.
1048 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1050 lu.LogInfo("Promoted nodes to master candidate role: %s",
1051 utils.CommaJoin(node.name for node in mod_list))
1052 for name in mod_list:
1053 lu.context.ReaddNode(name)
1054 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1056 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1060 def _DecideSelfPromotion(lu, exceptions=None):
1061 """Decide whether I should promote myself as a master candidate.
1064 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1065 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1066 # the new node will increase mc_max with one, so:
1067 mc_should = min(mc_should + 1, cp_size)
1068 return mc_now < mc_should
1071 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1072 """Check that the brigdes needed by a list of nics exist.
1075 cluster = lu.cfg.GetClusterInfo()
1076 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1077 brlist = [params[constants.NIC_LINK] for params in paramslist
1078 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1080 result = lu.rpc.call_bridges_exist(target_node, brlist)
1081 result.Raise("Error checking bridges on destination node '%s'" %
1082 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1085 def _CheckInstanceBridgesExist(lu, instance, node=None):
1086 """Check that the brigdes needed by an instance exist.
1090 node = instance.primary_node
1091 _CheckNicsBridgesExist(lu, instance.nics, node)
1094 def _CheckOSVariant(os_obj, name):
1095 """Check whether an OS name conforms to the os variants specification.
1097 @type os_obj: L{objects.OS}
1098 @param os_obj: OS object to check
1100 @param name: OS name passed by the user, to check for validity
1103 if not os_obj.supported_variants:
1105 variant = objects.OS.GetVariant(name)
1107 raise errors.OpPrereqError("OS name must include a variant",
1110 if variant not in os_obj.supported_variants:
1111 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1114 def _GetNodeInstancesInner(cfg, fn):
1115 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1118 def _GetNodeInstances(cfg, node_name):
1119 """Returns a list of all primary and secondary instances on a node.
1123 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1126 def _GetNodePrimaryInstances(cfg, node_name):
1127 """Returns primary instances on a node.
1130 return _GetNodeInstancesInner(cfg,
1131 lambda inst: node_name == inst.primary_node)
1134 def _GetNodeSecondaryInstances(cfg, node_name):
1135 """Returns secondary instances on a node.
1138 return _GetNodeInstancesInner(cfg,
1139 lambda inst: node_name in inst.secondary_nodes)
1142 def _GetStorageTypeArgs(cfg, storage_type):
1143 """Returns the arguments for a storage type.
1146 # Special case for file storage
1147 if storage_type == constants.ST_FILE:
1148 # storage.FileStorage wants a list of storage directories
1149 return [[cfg.GetFileStorageDir()]]
1154 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1157 for dev in instance.disks:
1158 cfg.SetDiskID(dev, node_name)
1160 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1161 result.Raise("Failed to get disk status from node %s" % node_name,
1162 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1164 for idx, bdev_status in enumerate(result.payload):
1165 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1171 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1172 """Check the sanity of iallocator and node arguments and use the
1173 cluster-wide iallocator if appropriate.
1175 Check that at most one of (iallocator, node) is specified. If none is
1176 specified, then the LU's opcode's iallocator slot is filled with the
1177 cluster-wide default iallocator.
1179 @type iallocator_slot: string
1180 @param iallocator_slot: the name of the opcode iallocator slot
1181 @type node_slot: string
1182 @param node_slot: the name of the opcode target node slot
1185 node = getattr(lu.op, node_slot, None)
1186 iallocator = getattr(lu.op, iallocator_slot, None)
1188 if node is not None and iallocator is not None:
1189 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1191 elif node is None and iallocator is None:
1192 default_iallocator = lu.cfg.GetDefaultIAllocator()
1193 if default_iallocator:
1194 setattr(lu.op, iallocator_slot, default_iallocator)
1196 raise errors.OpPrereqError("No iallocator or node given and no"
1197 " cluster-wide default iallocator found."
1198 " Please specify either an iallocator or a"
1199 " node, or set a cluster-wide default"
1203 class LUPostInitCluster(LogicalUnit):
1204 """Logical unit for running hooks after cluster initialization.
1207 HPATH = "cluster-init"
1208 HTYPE = constants.HTYPE_CLUSTER
1210 def BuildHooksEnv(self):
1214 env = {"OP_TARGET": self.cfg.GetClusterName()}
1215 mn = self.cfg.GetMasterNode()
1216 return env, [], [mn]
1218 def Exec(self, feedback_fn):
1225 class LUDestroyCluster(LogicalUnit):
1226 """Logical unit for destroying the cluster.
1229 HPATH = "cluster-destroy"
1230 HTYPE = constants.HTYPE_CLUSTER
1232 def BuildHooksEnv(self):
1236 env = {"OP_TARGET": self.cfg.GetClusterName()}
1239 def CheckPrereq(self):
1240 """Check prerequisites.
1242 This checks whether the cluster is empty.
1244 Any errors are signaled by raising errors.OpPrereqError.
1247 master = self.cfg.GetMasterNode()
1249 nodelist = self.cfg.GetNodeList()
1250 if len(nodelist) != 1 or nodelist[0] != master:
1251 raise errors.OpPrereqError("There are still %d node(s) in"
1252 " this cluster." % (len(nodelist) - 1),
1254 instancelist = self.cfg.GetInstanceList()
1256 raise errors.OpPrereqError("There are still %d instance(s) in"
1257 " this cluster." % len(instancelist),
1260 def Exec(self, feedback_fn):
1261 """Destroys the cluster.
1264 master = self.cfg.GetMasterNode()
1265 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1267 # Run post hooks on master node before it's removed
1268 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1270 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1272 # pylint: disable-msg=W0702
1273 self.LogWarning("Errors occurred running hooks on %s" % master)
1275 result = self.rpc.call_node_stop_master(master, False)
1276 result.Raise("Could not disable the master role")
1278 if modify_ssh_setup:
1279 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1280 utils.CreateBackup(priv_key)
1281 utils.CreateBackup(pub_key)
1286 def _VerifyCertificate(filename):
1287 """Verifies a certificate for LUVerifyCluster.
1289 @type filename: string
1290 @param filename: Path to PEM file
1294 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1295 utils.ReadFile(filename))
1296 except Exception, err: # pylint: disable-msg=W0703
1297 return (LUVerifyCluster.ETYPE_ERROR,
1298 "Failed to load X509 certificate %s: %s" % (filename, err))
1301 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1302 constants.SSL_CERT_EXPIRATION_ERROR)
1305 fnamemsg = "While verifying %s: %s" % (filename, msg)
1310 return (None, fnamemsg)
1311 elif errcode == utils.CERT_WARNING:
1312 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1313 elif errcode == utils.CERT_ERROR:
1314 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1316 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1319 class LUVerifyCluster(LogicalUnit):
1320 """Verifies the cluster status.
1323 HPATH = "cluster-verify"
1324 HTYPE = constants.HTYPE_CLUSTER
1326 ("skip_checks", _EmptyList,
1327 _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1328 ("verbose", False, _TBool),
1329 ("error_codes", False, _TBool),
1330 ("debug_simulate_errors", False, _TBool),
1334 TCLUSTER = "cluster"
1336 TINSTANCE = "instance"
1338 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1339 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1340 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1341 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1342 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1343 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1344 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1345 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1346 ENODEDRBD = (TNODE, "ENODEDRBD")
1347 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1348 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1349 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1350 ENODEHV = (TNODE, "ENODEHV")
1351 ENODELVM = (TNODE, "ENODELVM")
1352 ENODEN1 = (TNODE, "ENODEN1")
1353 ENODENET = (TNODE, "ENODENET")
1354 ENODEOS = (TNODE, "ENODEOS")
1355 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1356 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1357 ENODERPC = (TNODE, "ENODERPC")
1358 ENODESSH = (TNODE, "ENODESSH")
1359 ENODEVERSION = (TNODE, "ENODEVERSION")
1360 ENODESETUP = (TNODE, "ENODESETUP")
1361 ENODETIME = (TNODE, "ENODETIME")
1363 ETYPE_FIELD = "code"
1364 ETYPE_ERROR = "ERROR"
1365 ETYPE_WARNING = "WARNING"
1367 class NodeImage(object):
1368 """A class representing the logical and physical status of a node.
1371 @ivar name: the node name to which this object refers
1372 @ivar volumes: a structure as returned from
1373 L{ganeti.backend.GetVolumeList} (runtime)
1374 @ivar instances: a list of running instances (runtime)
1375 @ivar pinst: list of configured primary instances (config)
1376 @ivar sinst: list of configured secondary instances (config)
1377 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1378 of this node (config)
1379 @ivar mfree: free memory, as reported by hypervisor (runtime)
1380 @ivar dfree: free disk, as reported by the node (runtime)
1381 @ivar offline: the offline status (config)
1382 @type rpc_fail: boolean
1383 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1384 not whether the individual keys were correct) (runtime)
1385 @type lvm_fail: boolean
1386 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1387 @type hyp_fail: boolean
1388 @ivar hyp_fail: whether the RPC call didn't return the instance list
1389 @type ghost: boolean
1390 @ivar ghost: whether this is a known node or not (config)
1391 @type os_fail: boolean
1392 @ivar os_fail: whether the RPC call didn't return valid OS data
1394 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1397 def __init__(self, offline=False, name=None):
1406 self.offline = offline
1407 self.rpc_fail = False
1408 self.lvm_fail = False
1409 self.hyp_fail = False
1411 self.os_fail = False
1414 def ExpandNames(self):
1415 self.needed_locks = {
1416 locking.LEVEL_NODE: locking.ALL_SET,
1417 locking.LEVEL_INSTANCE: locking.ALL_SET,
1419 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1421 def _Error(self, ecode, item, msg, *args, **kwargs):
1422 """Format an error message.
1424 Based on the opcode's error_codes parameter, either format a
1425 parseable error code, or a simpler error string.
1427 This must be called only from Exec and functions called from Exec.
1430 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1432 # first complete the msg
1435 # then format the whole message
1436 if self.op.error_codes:
1437 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1443 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1444 # and finally report it via the feedback_fn
1445 self._feedback_fn(" - %s" % msg)
1447 def _ErrorIf(self, cond, *args, **kwargs):
1448 """Log an error message if the passed condition is True.
1451 cond = bool(cond) or self.op.debug_simulate_errors
1453 self._Error(*args, **kwargs)
1454 # do not mark the operation as failed for WARN cases only
1455 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1456 self.bad = self.bad or cond
1458 def _VerifyNode(self, ninfo, nresult):
1459 """Perform some basic validation on data returned from a node.
1461 - check the result data structure is well formed and has all the
1463 - check ganeti version
1465 @type ninfo: L{objects.Node}
1466 @param ninfo: the node to check
1467 @param nresult: the results from the node
1469 @return: whether overall this call was successful (and we can expect
1470 reasonable values in the respose)
1474 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1476 # main result, nresult should be a non-empty dict
1477 test = not nresult or not isinstance(nresult, dict)
1478 _ErrorIf(test, self.ENODERPC, node,
1479 "unable to verify node: no data returned")
1483 # compares ganeti version
1484 local_version = constants.PROTOCOL_VERSION
1485 remote_version = nresult.get("version", None)
1486 test = not (remote_version and
1487 isinstance(remote_version, (list, tuple)) and
1488 len(remote_version) == 2)
1489 _ErrorIf(test, self.ENODERPC, node,
1490 "connection to node returned invalid data")
1494 test = local_version != remote_version[0]
1495 _ErrorIf(test, self.ENODEVERSION, node,
1496 "incompatible protocol versions: master %s,"
1497 " node %s", local_version, remote_version[0])
1501 # node seems compatible, we can actually try to look into its results
1503 # full package version
1504 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1505 self.ENODEVERSION, node,
1506 "software version mismatch: master %s, node %s",
1507 constants.RELEASE_VERSION, remote_version[1],
1508 code=self.ETYPE_WARNING)
1510 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1511 if isinstance(hyp_result, dict):
1512 for hv_name, hv_result in hyp_result.iteritems():
1513 test = hv_result is not None
1514 _ErrorIf(test, self.ENODEHV, node,
1515 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1518 test = nresult.get(constants.NV_NODESETUP,
1519 ["Missing NODESETUP results"])
1520 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1525 def _VerifyNodeTime(self, ninfo, nresult,
1526 nvinfo_starttime, nvinfo_endtime):
1527 """Check the node time.
1529 @type ninfo: L{objects.Node}
1530 @param ninfo: the node to check
1531 @param nresult: the remote results for the node
1532 @param nvinfo_starttime: the start time of the RPC call
1533 @param nvinfo_endtime: the end time of the RPC call
1537 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1539 ntime = nresult.get(constants.NV_TIME, None)
1541 ntime_merged = utils.MergeTime(ntime)
1542 except (ValueError, TypeError):
1543 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1546 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1547 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1548 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1549 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1553 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1554 "Node time diverges by at least %s from master node time",
1557 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1558 """Check the node time.
1560 @type ninfo: L{objects.Node}
1561 @param ninfo: the node to check
1562 @param nresult: the remote results for the node
1563 @param vg_name: the configured VG name
1570 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1572 # checks vg existence and size > 20G
1573 vglist = nresult.get(constants.NV_VGLIST, None)
1575 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1577 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1578 constants.MIN_VG_SIZE)
1579 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1582 pvlist = nresult.get(constants.NV_PVLIST, None)
1583 test = pvlist is None
1584 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1586 # check that ':' is not present in PV names, since it's a
1587 # special character for lvcreate (denotes the range of PEs to
1589 for _, pvname, owner_vg in pvlist:
1590 test = ":" in pvname
1591 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1592 " '%s' of VG '%s'", pvname, owner_vg)
1594 def _VerifyNodeNetwork(self, ninfo, nresult):
1595 """Check the node time.
1597 @type ninfo: L{objects.Node}
1598 @param ninfo: the node to check
1599 @param nresult: the remote results for the node
1603 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1605 test = constants.NV_NODELIST not in nresult
1606 _ErrorIf(test, self.ENODESSH, node,
1607 "node hasn't returned node ssh connectivity data")
1609 if nresult[constants.NV_NODELIST]:
1610 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1611 _ErrorIf(True, self.ENODESSH, node,
1612 "ssh communication with node '%s': %s", a_node, a_msg)
1614 test = constants.NV_NODENETTEST not in nresult
1615 _ErrorIf(test, self.ENODENET, node,
1616 "node hasn't returned node tcp connectivity data")
1618 if nresult[constants.NV_NODENETTEST]:
1619 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1621 _ErrorIf(True, self.ENODENET, node,
1622 "tcp communication with node '%s': %s",
1623 anode, nresult[constants.NV_NODENETTEST][anode])
1625 test = constants.NV_MASTERIP not in nresult
1626 _ErrorIf(test, self.ENODENET, node,
1627 "node hasn't returned node master IP reachability data")
1629 if not nresult[constants.NV_MASTERIP]:
1630 if node == self.master_node:
1631 msg = "the master node cannot reach the master IP (not configured?)"
1633 msg = "cannot reach the master IP"
1634 _ErrorIf(True, self.ENODENET, node, msg)
1637 def _VerifyInstance(self, instance, instanceconfig, node_image):
1638 """Verify an instance.
1640 This function checks to see if the required block devices are
1641 available on the instance's node.
1644 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1645 node_current = instanceconfig.primary_node
1647 node_vol_should = {}
1648 instanceconfig.MapLVsByNode(node_vol_should)
1650 for node in node_vol_should:
1651 n_img = node_image[node]
1652 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1653 # ignore missing volumes on offline or broken nodes
1655 for volume in node_vol_should[node]:
1656 test = volume not in n_img.volumes
1657 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1658 "volume %s missing on node %s", volume, node)
1660 if instanceconfig.admin_up:
1661 pri_img = node_image[node_current]
1662 test = instance not in pri_img.instances and not pri_img.offline
1663 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1664 "instance not running on its primary node %s",
1667 for node, n_img in node_image.items():
1668 if (not node == node_current):
1669 test = instance in n_img.instances
1670 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1671 "instance should not run on node %s", node)
1673 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1674 """Verify if there are any unknown volumes in the cluster.
1676 The .os, .swap and backup volumes are ignored. All other volumes are
1677 reported as unknown.
1679 @type reserved: L{ganeti.utils.FieldSet}
1680 @param reserved: a FieldSet of reserved volume names
1683 for node, n_img in node_image.items():
1684 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1685 # skip non-healthy nodes
1687 for volume in n_img.volumes:
1688 test = ((node not in node_vol_should or
1689 volume not in node_vol_should[node]) and
1690 not reserved.Matches(volume))
1691 self._ErrorIf(test, self.ENODEORPHANLV, node,
1692 "volume %s is unknown", volume)
1694 def _VerifyOrphanInstances(self, instancelist, node_image):
1695 """Verify the list of running instances.
1697 This checks what instances are running but unknown to the cluster.
1700 for node, n_img in node_image.items():
1701 for o_inst in n_img.instances:
1702 test = o_inst not in instancelist
1703 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1704 "instance %s on node %s should not exist", o_inst, node)
1706 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1707 """Verify N+1 Memory Resilience.
1709 Check that if one single node dies we can still start all the
1710 instances it was primary for.
1713 for node, n_img in node_image.items():
1714 # This code checks that every node which is now listed as
1715 # secondary has enough memory to host all instances it is
1716 # supposed to should a single other node in the cluster fail.
1717 # FIXME: not ready for failover to an arbitrary node
1718 # FIXME: does not support file-backed instances
1719 # WARNING: we currently take into account down instances as well
1720 # as up ones, considering that even if they're down someone
1721 # might want to start them even in the event of a node failure.
1722 for prinode, instances in n_img.sbp.items():
1724 for instance in instances:
1725 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1726 if bep[constants.BE_AUTO_BALANCE]:
1727 needed_mem += bep[constants.BE_MEMORY]
1728 test = n_img.mfree < needed_mem
1729 self._ErrorIf(test, self.ENODEN1, node,
1730 "not enough memory on to accommodate"
1731 " failovers should peer node %s fail", prinode)
1733 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1735 """Verifies and computes the node required file checksums.
1737 @type ninfo: L{objects.Node}
1738 @param ninfo: the node to check
1739 @param nresult: the remote results for the node
1740 @param file_list: required list of files
1741 @param local_cksum: dictionary of local files and their checksums
1742 @param master_files: list of files that only masters should have
1746 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1748 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1749 test = not isinstance(remote_cksum, dict)
1750 _ErrorIf(test, self.ENODEFILECHECK, node,
1751 "node hasn't returned file checksum data")
1755 for file_name in file_list:
1756 node_is_mc = ninfo.master_candidate
1757 must_have = (file_name not in master_files) or node_is_mc
1759 test1 = file_name not in remote_cksum
1761 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1763 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1764 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1765 "file '%s' missing", file_name)
1766 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1767 "file '%s' has wrong checksum", file_name)
1768 # not candidate and this is not a must-have file
1769 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1770 "file '%s' should not exist on non master"
1771 " candidates (and the file is outdated)", file_name)
1772 # all good, except non-master/non-must have combination
1773 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1774 "file '%s' should not exist"
1775 " on non master candidates", file_name)
1777 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1779 """Verifies and the node DRBD status.
1781 @type ninfo: L{objects.Node}
1782 @param ninfo: the node to check
1783 @param nresult: the remote results for the node
1784 @param instanceinfo: the dict of instances
1785 @param drbd_helper: the configured DRBD usermode helper
1786 @param drbd_map: the DRBD map as returned by
1787 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1791 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1794 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1795 test = (helper_result == None)
1796 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1797 "no drbd usermode helper returned")
1799 status, payload = helper_result
1801 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1802 "drbd usermode helper check unsuccessful: %s", payload)
1803 test = status and (payload != drbd_helper)
1804 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1805 "wrong drbd usermode helper: %s", payload)
1807 # compute the DRBD minors
1809 for minor, instance in drbd_map[node].items():
1810 test = instance not in instanceinfo
1811 _ErrorIf(test, self.ECLUSTERCFG, None,
1812 "ghost instance '%s' in temporary DRBD map", instance)
1813 # ghost instance should not be running, but otherwise we
1814 # don't give double warnings (both ghost instance and
1815 # unallocated minor in use)
1817 node_drbd[minor] = (instance, False)
1819 instance = instanceinfo[instance]
1820 node_drbd[minor] = (instance.name, instance.admin_up)
1822 # and now check them
1823 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1824 test = not isinstance(used_minors, (tuple, list))
1825 _ErrorIf(test, self.ENODEDRBD, node,
1826 "cannot parse drbd status file: %s", str(used_minors))
1828 # we cannot check drbd status
1831 for minor, (iname, must_exist) in node_drbd.items():
1832 test = minor not in used_minors and must_exist
1833 _ErrorIf(test, self.ENODEDRBD, node,
1834 "drbd minor %d of instance %s is not active", minor, iname)
1835 for minor in used_minors:
1836 test = minor not in node_drbd
1837 _ErrorIf(test, self.ENODEDRBD, node,
1838 "unallocated drbd minor %d is in use", minor)
1840 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1841 """Builds the node OS structures.
1843 @type ninfo: L{objects.Node}
1844 @param ninfo: the node to check
1845 @param nresult: the remote results for the node
1846 @param nimg: the node image object
1850 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1852 remote_os = nresult.get(constants.NV_OSLIST, None)
1853 test = (not isinstance(remote_os, list) or
1854 not compat.all(isinstance(v, list) and len(v) == 7
1855 for v in remote_os))
1857 _ErrorIf(test, self.ENODEOS, node,
1858 "node hasn't returned valid OS data")
1867 for (name, os_path, status, diagnose,
1868 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1870 if name not in os_dict:
1873 # parameters is a list of lists instead of list of tuples due to
1874 # JSON lacking a real tuple type, fix it:
1875 parameters = [tuple(v) for v in parameters]
1876 os_dict[name].append((os_path, status, diagnose,
1877 set(variants), set(parameters), set(api_ver)))
1879 nimg.oslist = os_dict
1881 def _VerifyNodeOS(self, ninfo, nimg, base):
1882 """Verifies the node OS list.
1884 @type ninfo: L{objects.Node}
1885 @param ninfo: the node to check
1886 @param nimg: the node image object
1887 @param base: the 'template' node we match against (e.g. from the master)
1891 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1893 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1895 for os_name, os_data in nimg.oslist.items():
1896 assert os_data, "Empty OS status for OS %s?!" % os_name
1897 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1898 _ErrorIf(not f_status, self.ENODEOS, node,
1899 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1900 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1901 "OS '%s' has multiple entries (first one shadows the rest): %s",
1902 os_name, utils.CommaJoin([v[0] for v in os_data]))
1903 # this will catched in backend too
1904 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1905 and not f_var, self.ENODEOS, node,
1906 "OS %s with API at least %d does not declare any variant",
1907 os_name, constants.OS_API_V15)
1908 # comparisons with the 'base' image
1909 test = os_name not in base.oslist
1910 _ErrorIf(test, self.ENODEOS, node,
1911 "Extra OS %s not present on reference node (%s)",
1915 assert base.oslist[os_name], "Base node has empty OS status?"
1916 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1918 # base OS is invalid, skipping
1920 for kind, a, b in [("API version", f_api, b_api),
1921 ("variants list", f_var, b_var),
1922 ("parameters", f_param, b_param)]:
1923 _ErrorIf(a != b, self.ENODEOS, node,
1924 "OS %s %s differs from reference node %s: %s vs. %s",
1925 kind, os_name, base.name,
1926 utils.CommaJoin(a), utils.CommaJoin(b))
1928 # check any missing OSes
1929 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1930 _ErrorIf(missing, self.ENODEOS, node,
1931 "OSes present on reference node %s but missing on this node: %s",
1932 base.name, utils.CommaJoin(missing))
1934 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1935 """Verifies and updates the node volume data.
1937 This function will update a L{NodeImage}'s internal structures
1938 with data from the remote call.
1940 @type ninfo: L{objects.Node}
1941 @param ninfo: the node to check
1942 @param nresult: the remote results for the node
1943 @param nimg: the node image object
1944 @param vg_name: the configured VG name
1948 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1950 nimg.lvm_fail = True
1951 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1954 elif isinstance(lvdata, basestring):
1955 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1956 utils.SafeEncode(lvdata))
1957 elif not isinstance(lvdata, dict):
1958 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1960 nimg.volumes = lvdata
1961 nimg.lvm_fail = False
1963 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1964 """Verifies and updates the node instance list.
1966 If the listing was successful, then updates this node's instance
1967 list. Otherwise, it marks the RPC call as failed for the instance
1970 @type ninfo: L{objects.Node}
1971 @param ninfo: the node to check
1972 @param nresult: the remote results for the node
1973 @param nimg: the node image object
1976 idata = nresult.get(constants.NV_INSTANCELIST, None)
1977 test = not isinstance(idata, list)
1978 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1979 " (instancelist): %s", utils.SafeEncode(str(idata)))
1981 nimg.hyp_fail = True
1983 nimg.instances = idata
1985 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1986 """Verifies and computes a node information map
1988 @type ninfo: L{objects.Node}
1989 @param ninfo: the node to check
1990 @param nresult: the remote results for the node
1991 @param nimg: the node image object
1992 @param vg_name: the configured VG name
1996 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1998 # try to read free memory (from the hypervisor)
1999 hv_info = nresult.get(constants.NV_HVINFO, None)
2000 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2001 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2004 nimg.mfree = int(hv_info["memory_free"])
2005 except (ValueError, TypeError):
2006 _ErrorIf(True, self.ENODERPC, node,
2007 "node returned invalid nodeinfo, check hypervisor")
2009 # FIXME: devise a free space model for file based instances as well
2010 if vg_name is not None:
2011 test = (constants.NV_VGLIST not in nresult or
2012 vg_name not in nresult[constants.NV_VGLIST])
2013 _ErrorIf(test, self.ENODELVM, node,
2014 "node didn't return data for the volume group '%s'"
2015 " - it is either missing or broken", vg_name)
2018 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2019 except (ValueError, TypeError):
2020 _ErrorIf(True, self.ENODERPC, node,
2021 "node returned invalid LVM info, check LVM status")
2023 def BuildHooksEnv(self):
2026 Cluster-Verify hooks just ran in the post phase and their failure makes
2027 the output be logged in the verify output and the verification to fail.
2030 all_nodes = self.cfg.GetNodeList()
2032 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2034 for node in self.cfg.GetAllNodesInfo().values():
2035 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2037 return env, [], all_nodes
2039 def Exec(self, feedback_fn):
2040 """Verify integrity of cluster, performing various test on nodes.
2044 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2045 verbose = self.op.verbose
2046 self._feedback_fn = feedback_fn
2047 feedback_fn("* Verifying global settings")
2048 for msg in self.cfg.VerifyConfig():
2049 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2051 # Check the cluster certificates
2052 for cert_filename in constants.ALL_CERT_FILES:
2053 (errcode, msg) = _VerifyCertificate(cert_filename)
2054 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2056 vg_name = self.cfg.GetVGName()
2057 drbd_helper = self.cfg.GetDRBDHelper()
2058 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2059 cluster = self.cfg.GetClusterInfo()
2060 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2061 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2062 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2063 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2064 for iname in instancelist)
2065 i_non_redundant = [] # Non redundant instances
2066 i_non_a_balanced = [] # Non auto-balanced instances
2067 n_offline = 0 # Count of offline nodes
2068 n_drained = 0 # Count of nodes being drained
2069 node_vol_should = {}
2071 # FIXME: verify OS list
2072 # do local checksums
2073 master_files = [constants.CLUSTER_CONF_FILE]
2074 master_node = self.master_node = self.cfg.GetMasterNode()
2075 master_ip = self.cfg.GetMasterIP()
2077 file_names = ssconf.SimpleStore().GetFileList()
2078 file_names.extend(constants.ALL_CERT_FILES)
2079 file_names.extend(master_files)
2080 if cluster.modify_etc_hosts:
2081 file_names.append(constants.ETC_HOSTS)
2083 local_checksums = utils.FingerprintFiles(file_names)
2085 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2086 node_verify_param = {
2087 constants.NV_FILELIST: file_names,
2088 constants.NV_NODELIST: [node.name for node in nodeinfo
2089 if not node.offline],
2090 constants.NV_HYPERVISOR: hypervisors,
2091 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2092 node.secondary_ip) for node in nodeinfo
2093 if not node.offline],
2094 constants.NV_INSTANCELIST: hypervisors,
2095 constants.NV_VERSION: None,
2096 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2097 constants.NV_NODESETUP: None,
2098 constants.NV_TIME: None,
2099 constants.NV_MASTERIP: (master_node, master_ip),
2100 constants.NV_OSLIST: None,
2103 if vg_name is not None:
2104 node_verify_param[constants.NV_VGLIST] = None
2105 node_verify_param[constants.NV_LVLIST] = vg_name
2106 node_verify_param[constants.NV_PVLIST] = [vg_name]
2107 node_verify_param[constants.NV_DRBDLIST] = None
2110 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2112 # Build our expected cluster state
2113 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2115 for node in nodeinfo)
2117 for instance in instancelist:
2118 inst_config = instanceinfo[instance]
2120 for nname in inst_config.all_nodes:
2121 if nname not in node_image:
2123 gnode = self.NodeImage(name=nname)
2125 node_image[nname] = gnode
2127 inst_config.MapLVsByNode(node_vol_should)
2129 pnode = inst_config.primary_node
2130 node_image[pnode].pinst.append(instance)
2132 for snode in inst_config.secondary_nodes:
2133 nimg = node_image[snode]
2134 nimg.sinst.append(instance)
2135 if pnode not in nimg.sbp:
2136 nimg.sbp[pnode] = []
2137 nimg.sbp[pnode].append(instance)
2139 # At this point, we have the in-memory data structures complete,
2140 # except for the runtime information, which we'll gather next
2142 # Due to the way our RPC system works, exact response times cannot be
2143 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2144 # time before and after executing the request, we can at least have a time
2146 nvinfo_starttime = time.time()
2147 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2148 self.cfg.GetClusterName())
2149 nvinfo_endtime = time.time()
2151 all_drbd_map = self.cfg.ComputeDRBDMap()
2153 feedback_fn("* Verifying node status")
2157 for node_i in nodeinfo:
2159 nimg = node_image[node]
2163 feedback_fn("* Skipping offline node %s" % (node,))
2167 if node == master_node:
2169 elif node_i.master_candidate:
2170 ntype = "master candidate"
2171 elif node_i.drained:
2177 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2179 msg = all_nvinfo[node].fail_msg
2180 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2182 nimg.rpc_fail = True
2185 nresult = all_nvinfo[node].payload
2187 nimg.call_ok = self._VerifyNode(node_i, nresult)
2188 self._VerifyNodeNetwork(node_i, nresult)
2189 self._VerifyNodeLVM(node_i, nresult, vg_name)
2190 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2192 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2194 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2196 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2197 self._UpdateNodeInstances(node_i, nresult, nimg)
2198 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2199 self._UpdateNodeOS(node_i, nresult, nimg)
2200 if not nimg.os_fail:
2201 if refos_img is None:
2203 self._VerifyNodeOS(node_i, nimg, refos_img)
2205 feedback_fn("* Verifying instance status")
2206 for instance in instancelist:
2208 feedback_fn("* Verifying instance %s" % instance)
2209 inst_config = instanceinfo[instance]
2210 self._VerifyInstance(instance, inst_config, node_image)
2211 inst_nodes_offline = []
2213 pnode = inst_config.primary_node
2214 pnode_img = node_image[pnode]
2215 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2216 self.ENODERPC, pnode, "instance %s, connection to"
2217 " primary node failed", instance)
2219 if pnode_img.offline:
2220 inst_nodes_offline.append(pnode)
2222 # If the instance is non-redundant we cannot survive losing its primary
2223 # node, so we are not N+1 compliant. On the other hand we have no disk
2224 # templates with more than one secondary so that situation is not well
2226 # FIXME: does not support file-backed instances
2227 if not inst_config.secondary_nodes:
2228 i_non_redundant.append(instance)
2229 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2230 instance, "instance has multiple secondary nodes: %s",
2231 utils.CommaJoin(inst_config.secondary_nodes),
2232 code=self.ETYPE_WARNING)
2234 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2235 i_non_a_balanced.append(instance)
2237 for snode in inst_config.secondary_nodes:
2238 s_img = node_image[snode]
2239 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2240 "instance %s, connection to secondary node failed", instance)
2243 inst_nodes_offline.append(snode)
2245 # warn that the instance lives on offline nodes
2246 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2247 "instance lives on offline node(s) %s",
2248 utils.CommaJoin(inst_nodes_offline))
2249 # ... or ghost nodes
2250 for node in inst_config.all_nodes:
2251 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2252 "instance lives on ghost node %s", node)
2254 feedback_fn("* Verifying orphan volumes")
2255 reserved = utils.FieldSet(*cluster.reserved_lvs)
2256 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2258 feedback_fn("* Verifying orphan instances")
2259 self._VerifyOrphanInstances(instancelist, node_image)
2261 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2262 feedback_fn("* Verifying N+1 Memory redundancy")
2263 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2265 feedback_fn("* Other Notes")
2267 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2268 % len(i_non_redundant))
2270 if i_non_a_balanced:
2271 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2272 % len(i_non_a_balanced))
2275 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2278 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2282 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2283 """Analyze the post-hooks' result
2285 This method analyses the hook result, handles it, and sends some
2286 nicely-formatted feedback back to the user.
2288 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2289 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2290 @param hooks_results: the results of the multi-node hooks rpc call
2291 @param feedback_fn: function used send feedback back to the caller
2292 @param lu_result: previous Exec result
2293 @return: the new Exec result, based on the previous result
2297 # We only really run POST phase hooks, and are only interested in
2299 if phase == constants.HOOKS_PHASE_POST:
2300 # Used to change hooks' output to proper indentation
2301 indent_re = re.compile('^', re.M)
2302 feedback_fn("* Hooks Results")
2303 assert hooks_results, "invalid result from hooks"
2305 for node_name in hooks_results:
2306 res = hooks_results[node_name]
2308 test = msg and not res.offline
2309 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2310 "Communication failure in hooks execution: %s", msg)
2311 if res.offline or msg:
2312 # No need to investigate payload if node is offline or gave an error.
2313 # override manually lu_result here as _ErrorIf only
2314 # overrides self.bad
2317 for script, hkr, output in res.payload:
2318 test = hkr == constants.HKR_FAIL
2319 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2320 "Script %s failed, output:", script)
2322 output = indent_re.sub(' ', output)
2323 feedback_fn("%s" % output)
2329 class LUVerifyDisks(NoHooksLU):
2330 """Verifies the cluster disks status.
2335 def ExpandNames(self):
2336 self.needed_locks = {
2337 locking.LEVEL_NODE: locking.ALL_SET,
2338 locking.LEVEL_INSTANCE: locking.ALL_SET,
2340 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2342 def Exec(self, feedback_fn):
2343 """Verify integrity of cluster disks.
2345 @rtype: tuple of three items
2346 @return: a tuple of (dict of node-to-node_error, list of instances
2347 which need activate-disks, dict of instance: (node, volume) for
2351 result = res_nodes, res_instances, res_missing = {}, [], {}
2353 vg_name = self.cfg.GetVGName()
2354 nodes = utils.NiceSort(self.cfg.GetNodeList())
2355 instances = [self.cfg.GetInstanceInfo(name)
2356 for name in self.cfg.GetInstanceList()]
2359 for inst in instances:
2361 if (not inst.admin_up or
2362 inst.disk_template not in constants.DTS_NET_MIRROR):
2364 inst.MapLVsByNode(inst_lvs)
2365 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2366 for node, vol_list in inst_lvs.iteritems():
2367 for vol in vol_list:
2368 nv_dict[(node, vol)] = inst
2373 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2377 node_res = node_lvs[node]
2378 if node_res.offline:
2380 msg = node_res.fail_msg
2382 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2383 res_nodes[node] = msg
2386 lvs = node_res.payload
2387 for lv_name, (_, _, lv_online) in lvs.items():
2388 inst = nv_dict.pop((node, lv_name), None)
2389 if (not lv_online and inst is not None
2390 and inst.name not in res_instances):
2391 res_instances.append(inst.name)
2393 # any leftover items in nv_dict are missing LVs, let's arrange the
2395 for key, inst in nv_dict.iteritems():
2396 if inst.name not in res_missing:
2397 res_missing[inst.name] = []
2398 res_missing[inst.name].append(key)
2403 class LURepairDiskSizes(NoHooksLU):
2404 """Verifies the cluster disks sizes.
2407 _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2410 def ExpandNames(self):
2411 if self.op.instances:
2412 self.wanted_names = []
2413 for name in self.op.instances:
2414 full_name = _ExpandInstanceName(self.cfg, name)
2415 self.wanted_names.append(full_name)
2416 self.needed_locks = {
2417 locking.LEVEL_NODE: [],
2418 locking.LEVEL_INSTANCE: self.wanted_names,
2420 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2422 self.wanted_names = None
2423 self.needed_locks = {
2424 locking.LEVEL_NODE: locking.ALL_SET,
2425 locking.LEVEL_INSTANCE: locking.ALL_SET,
2427 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2429 def DeclareLocks(self, level):
2430 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2431 self._LockInstancesNodes(primary_only=True)
2433 def CheckPrereq(self):
2434 """Check prerequisites.
2436 This only checks the optional instance list against the existing names.
2439 if self.wanted_names is None:
2440 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2442 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2443 in self.wanted_names]
2445 def _EnsureChildSizes(self, disk):
2446 """Ensure children of the disk have the needed disk size.
2448 This is valid mainly for DRBD8 and fixes an issue where the
2449 children have smaller disk size.
2451 @param disk: an L{ganeti.objects.Disk} object
2454 if disk.dev_type == constants.LD_DRBD8:
2455 assert disk.children, "Empty children for DRBD8?"
2456 fchild = disk.children[0]
2457 mismatch = fchild.size < disk.size
2459 self.LogInfo("Child disk has size %d, parent %d, fixing",
2460 fchild.size, disk.size)
2461 fchild.size = disk.size
2463 # and we recurse on this child only, not on the metadev
2464 return self._EnsureChildSizes(fchild) or mismatch
2468 def Exec(self, feedback_fn):
2469 """Verify the size of cluster disks.
2472 # TODO: check child disks too
2473 # TODO: check differences in size between primary/secondary nodes
2475 for instance in self.wanted_instances:
2476 pnode = instance.primary_node
2477 if pnode not in per_node_disks:
2478 per_node_disks[pnode] = []
2479 for idx, disk in enumerate(instance.disks):
2480 per_node_disks[pnode].append((instance, idx, disk))
2483 for node, dskl in per_node_disks.items():
2484 newl = [v[2].Copy() for v in dskl]
2486 self.cfg.SetDiskID(dsk, node)
2487 result = self.rpc.call_blockdev_getsize(node, newl)
2489 self.LogWarning("Failure in blockdev_getsize call to node"
2490 " %s, ignoring", node)
2492 if len(result.payload) != len(dskl):
2493 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2494 " result.payload=%s", node, len(dskl), result.payload)
2495 self.LogWarning("Invalid result from node %s, ignoring node results",
2498 for ((instance, idx, disk), size) in zip(dskl, result.payload):
2500 self.LogWarning("Disk %d of instance %s did not return size"
2501 " information, ignoring", idx, instance.name)
2503 if not isinstance(size, (int, long)):
2504 self.LogWarning("Disk %d of instance %s did not return valid"
2505 " size information, ignoring", idx, instance.name)
2508 if size != disk.size:
2509 self.LogInfo("Disk %d of instance %s has mismatched size,"
2510 " correcting: recorded %d, actual %d", idx,
2511 instance.name, disk.size, size)
2513 self.cfg.Update(instance, feedback_fn)
2514 changed.append((instance.name, idx, size))
2515 if self._EnsureChildSizes(disk):
2516 self.cfg.Update(instance, feedback_fn)
2517 changed.append((instance.name, idx, disk.size))
2521 class LURenameCluster(LogicalUnit):
2522 """Rename the cluster.
2525 HPATH = "cluster-rename"
2526 HTYPE = constants.HTYPE_CLUSTER
2527 _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2529 def BuildHooksEnv(self):
2534 "OP_TARGET": self.cfg.GetClusterName(),
2535 "NEW_NAME": self.op.name,
2537 mn = self.cfg.GetMasterNode()
2538 all_nodes = self.cfg.GetNodeList()
2539 return env, [mn], all_nodes
2541 def CheckPrereq(self):
2542 """Verify that the passed name is a valid one.
2545 hostname = netutils.GetHostInfo(self.op.name)
2547 new_name = hostname.name
2548 self.ip = new_ip = hostname.ip
2549 old_name = self.cfg.GetClusterName()
2550 old_ip = self.cfg.GetMasterIP()
2551 if new_name == old_name and new_ip == old_ip:
2552 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2553 " cluster has changed",
2555 if new_ip != old_ip:
2556 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2557 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2558 " reachable on the network. Aborting." %
2559 new_ip, errors.ECODE_NOTUNIQUE)
2561 self.op.name = new_name
2563 def Exec(self, feedback_fn):
2564 """Rename the cluster.
2567 clustername = self.op.name
2570 # shutdown the master IP
2571 master = self.cfg.GetMasterNode()
2572 result = self.rpc.call_node_stop_master(master, False)
2573 result.Raise("Could not disable the master role")
2576 cluster = self.cfg.GetClusterInfo()
2577 cluster.cluster_name = clustername
2578 cluster.master_ip = ip
2579 self.cfg.Update(cluster, feedback_fn)
2581 # update the known hosts file
2582 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2583 node_list = self.cfg.GetNodeList()
2585 node_list.remove(master)
2588 result = self.rpc.call_upload_file(node_list,
2589 constants.SSH_KNOWN_HOSTS_FILE)
2590 for to_node, to_result in result.iteritems():
2591 msg = to_result.fail_msg
2593 msg = ("Copy of file %s to node %s failed: %s" %
2594 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2595 self.proc.LogWarning(msg)
2598 result = self.rpc.call_node_start_master(master, False, False)
2599 msg = result.fail_msg
2601 self.LogWarning("Could not re-enable the master role on"
2602 " the master, please restart manually: %s", msg)
2607 class LUSetClusterParams(LogicalUnit):
2608 """Change the parameters of the cluster.
2611 HPATH = "cluster-modify"
2612 HTYPE = constants.HTYPE_CLUSTER
2614 ("vg_name", None, _TMaybeString),
2615 ("enabled_hypervisors", None,
2616 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2617 ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2618 ("beparams", None, _TOr(_TDict, _TNone)),
2619 ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2620 ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2621 ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2622 ("uid_pool", None, _NoType),
2623 ("add_uids", None, _NoType),
2624 ("remove_uids", None, _NoType),
2625 ("maintain_node_health", None, _TMaybeBool),
2626 ("nicparams", None, _TOr(_TDict, _TNone)),
2627 ("drbd_helper", None, _TOr(_TString, _TNone)),
2628 ("default_iallocator", None, _TMaybeString),
2629 ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2630 ("hidden_os", None, _TOr(_TListOf(\
2633 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2635 ("blacklisted_os", None, _TOr(_TListOf(\
2638 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2643 def CheckArguments(self):
2647 if self.op.uid_pool:
2648 uidpool.CheckUidPool(self.op.uid_pool)
2650 if self.op.add_uids:
2651 uidpool.CheckUidPool(self.op.add_uids)
2653 if self.op.remove_uids:
2654 uidpool.CheckUidPool(self.op.remove_uids)
2656 def ExpandNames(self):
2657 # FIXME: in the future maybe other cluster params won't require checking on
2658 # all nodes to be modified.
2659 self.needed_locks = {
2660 locking.LEVEL_NODE: locking.ALL_SET,
2662 self.share_locks[locking.LEVEL_NODE] = 1
2664 def BuildHooksEnv(self):
2669 "OP_TARGET": self.cfg.GetClusterName(),
2670 "NEW_VG_NAME": self.op.vg_name,
2672 mn = self.cfg.GetMasterNode()
2673 return env, [mn], [mn]
2675 def CheckPrereq(self):
2676 """Check prerequisites.
2678 This checks whether the given params don't conflict and
2679 if the given volume group is valid.
2682 if self.op.vg_name is not None and not self.op.vg_name:
2683 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2684 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2685 " instances exist", errors.ECODE_INVAL)
2687 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2688 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2689 raise errors.OpPrereqError("Cannot disable drbd helper while"
2690 " drbd-based instances exist",
2693 node_list = self.acquired_locks[locking.LEVEL_NODE]
2695 # if vg_name not None, checks given volume group on all nodes
2697 vglist = self.rpc.call_vg_list(node_list)
2698 for node in node_list:
2699 msg = vglist[node].fail_msg
2701 # ignoring down node
2702 self.LogWarning("Error while gathering data on node %s"
2703 " (ignoring node): %s", node, msg)
2705 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2707 constants.MIN_VG_SIZE)
2709 raise errors.OpPrereqError("Error on node '%s': %s" %
2710 (node, vgstatus), errors.ECODE_ENVIRON)
2712 if self.op.drbd_helper:
2713 # checks given drbd helper on all nodes
2714 helpers = self.rpc.call_drbd_helper(node_list)
2715 for node in node_list:
2716 ninfo = self.cfg.GetNodeInfo(node)
2718 self.LogInfo("Not checking drbd helper on offline node %s", node)
2720 msg = helpers[node].fail_msg
2722 raise errors.OpPrereqError("Error checking drbd helper on node"
2723 " '%s': %s" % (node, msg),
2724 errors.ECODE_ENVIRON)
2725 node_helper = helpers[node].payload
2726 if node_helper != self.op.drbd_helper:
2727 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2728 (node, node_helper), errors.ECODE_ENVIRON)
2730 self.cluster = cluster = self.cfg.GetClusterInfo()
2731 # validate params changes
2732 if self.op.beparams:
2733 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2734 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2736 if self.op.nicparams:
2737 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2738 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2739 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2742 # check all instances for consistency
2743 for instance in self.cfg.GetAllInstancesInfo().values():
2744 for nic_idx, nic in enumerate(instance.nics):
2745 params_copy = copy.deepcopy(nic.nicparams)
2746 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2748 # check parameter syntax
2750 objects.NIC.CheckParameterSyntax(params_filled)
2751 except errors.ConfigurationError, err:
2752 nic_errors.append("Instance %s, nic/%d: %s" %
2753 (instance.name, nic_idx, err))
2755 # if we're moving instances to routed, check that they have an ip
2756 target_mode = params_filled[constants.NIC_MODE]
2757 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2758 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2759 (instance.name, nic_idx))
2761 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2762 "\n".join(nic_errors))
2764 # hypervisor list/parameters
2765 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2766 if self.op.hvparams:
2767 for hv_name, hv_dict in self.op.hvparams.items():
2768 if hv_name not in self.new_hvparams:
2769 self.new_hvparams[hv_name] = hv_dict
2771 self.new_hvparams[hv_name].update(hv_dict)
2773 # os hypervisor parameters
2774 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2776 for os_name, hvs in self.op.os_hvp.items():
2777 if os_name not in self.new_os_hvp:
2778 self.new_os_hvp[os_name] = hvs
2780 for hv_name, hv_dict in hvs.items():
2781 if hv_name not in self.new_os_hvp[os_name]:
2782 self.new_os_hvp[os_name][hv_name] = hv_dict
2784 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2787 self.new_osp = objects.FillDict(cluster.osparams, {})
2788 if self.op.osparams:
2789 for os_name, osp in self.op.osparams.items():
2790 if os_name not in self.new_osp:
2791 self.new_osp[os_name] = {}
2793 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2796 if not self.new_osp[os_name]:
2797 # we removed all parameters
2798 del self.new_osp[os_name]
2800 # check the parameter validity (remote check)
2801 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2802 os_name, self.new_osp[os_name])
2804 # changes to the hypervisor list
2805 if self.op.enabled_hypervisors is not None:
2806 self.hv_list = self.op.enabled_hypervisors
2807 for hv in self.hv_list:
2808 # if the hypervisor doesn't already exist in the cluster
2809 # hvparams, we initialize it to empty, and then (in both
2810 # cases) we make sure to fill the defaults, as we might not
2811 # have a complete defaults list if the hypervisor wasn't
2813 if hv not in new_hvp:
2815 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2816 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2818 self.hv_list = cluster.enabled_hypervisors
2820 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2821 # either the enabled list has changed, or the parameters have, validate
2822 for hv_name, hv_params in self.new_hvparams.items():
2823 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2824 (self.op.enabled_hypervisors and
2825 hv_name in self.op.enabled_hypervisors)):
2826 # either this is a new hypervisor, or its parameters have changed
2827 hv_class = hypervisor.GetHypervisor(hv_name)
2828 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2829 hv_class.CheckParameterSyntax(hv_params)
2830 _CheckHVParams(self, node_list, hv_name, hv_params)
2833 # no need to check any newly-enabled hypervisors, since the
2834 # defaults have already been checked in the above code-block
2835 for os_name, os_hvp in self.new_os_hvp.items():
2836 for hv_name, hv_params in os_hvp.items():
2837 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2838 # we need to fill in the new os_hvp on top of the actual hv_p
2839 cluster_defaults = self.new_hvparams.get(hv_name, {})
2840 new_osp = objects.FillDict(cluster_defaults, hv_params)
2841 hv_class = hypervisor.GetHypervisor(hv_name)
2842 hv_class.CheckParameterSyntax(new_osp)
2843 _CheckHVParams(self, node_list, hv_name, new_osp)
2845 if self.op.default_iallocator:
2846 alloc_script = utils.FindFile(self.op.default_iallocator,
2847 constants.IALLOCATOR_SEARCH_PATH,
2849 if alloc_script is None:
2850 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2851 " specified" % self.op.default_iallocator,
2854 def Exec(self, feedback_fn):
2855 """Change the parameters of the cluster.
2858 if self.op.vg_name is not None:
2859 new_volume = self.op.vg_name
2862 if new_volume != self.cfg.GetVGName():
2863 self.cfg.SetVGName(new_volume)
2865 feedback_fn("Cluster LVM configuration already in desired"
2866 " state, not changing")
2867 if self.op.drbd_helper is not None:
2868 new_helper = self.op.drbd_helper
2871 if new_helper != self.cfg.GetDRBDHelper():
2872 self.cfg.SetDRBDHelper(new_helper)
2874 feedback_fn("Cluster DRBD helper already in desired state,"
2876 if self.op.hvparams:
2877 self.cluster.hvparams = self.new_hvparams
2879 self.cluster.os_hvp = self.new_os_hvp
2880 if self.op.enabled_hypervisors is not None:
2881 self.cluster.hvparams = self.new_hvparams
2882 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2883 if self.op.beparams:
2884 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2885 if self.op.nicparams:
2886 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2887 if self.op.osparams:
2888 self.cluster.osparams = self.new_osp
2890 if self.op.candidate_pool_size is not None:
2891 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2892 # we need to update the pool size here, otherwise the save will fail
2893 _AdjustCandidatePool(self, [])
2895 if self.op.maintain_node_health is not None:
2896 self.cluster.maintain_node_health = self.op.maintain_node_health
2898 if self.op.add_uids is not None:
2899 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2901 if self.op.remove_uids is not None:
2902 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2904 if self.op.uid_pool is not None:
2905 self.cluster.uid_pool = self.op.uid_pool
2907 if self.op.default_iallocator is not None:
2908 self.cluster.default_iallocator = self.op.default_iallocator
2910 if self.op.reserved_lvs is not None:
2911 self.cluster.reserved_lvs = self.op.reserved_lvs
2913 def helper_os(aname, mods, desc):
2915 lst = getattr(self.cluster, aname)
2916 for key, val in mods:
2917 if key == constants.DDM_ADD:
2919 feedback_fn("OS %s already in %s, ignoring", val, desc)
2922 elif key == constants.DDM_REMOVE:
2926 feedback_fn("OS %s not found in %s, ignoring", val, desc)
2928 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2930 if self.op.hidden_os:
2931 helper_os("hidden_os", self.op.hidden_os, "hidden")
2933 if self.op.blacklisted_os:
2934 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2936 self.cfg.Update(self.cluster, feedback_fn)
2939 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2940 """Distribute additional files which are part of the cluster configuration.
2942 ConfigWriter takes care of distributing the config and ssconf files, but
2943 there are more files which should be distributed to all nodes. This function
2944 makes sure those are copied.
2946 @param lu: calling logical unit
2947 @param additional_nodes: list of nodes not in the config to distribute to
2950 # 1. Gather target nodes
2951 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2952 dist_nodes = lu.cfg.GetOnlineNodeList()
2953 if additional_nodes is not None:
2954 dist_nodes.extend(additional_nodes)
2955 if myself.name in dist_nodes:
2956 dist_nodes.remove(myself.name)
2958 # 2. Gather files to distribute
2959 dist_files = set([constants.ETC_HOSTS,
2960 constants.SSH_KNOWN_HOSTS_FILE,
2961 constants.RAPI_CERT_FILE,
2962 constants.RAPI_USERS_FILE,
2963 constants.CONFD_HMAC_KEY,
2964 constants.CLUSTER_DOMAIN_SECRET_FILE,
2967 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2968 for hv_name in enabled_hypervisors:
2969 hv_class = hypervisor.GetHypervisor(hv_name)
2970 dist_files.update(hv_class.GetAncillaryFiles())
2972 # 3. Perform the files upload
2973 for fname in dist_files:
2974 if os.path.exists(fname):
2975 result = lu.rpc.call_upload_file(dist_nodes, fname)
2976 for to_node, to_result in result.items():
2977 msg = to_result.fail_msg
2979 msg = ("Copy of file %s to node %s failed: %s" %
2980 (fname, to_node, msg))
2981 lu.proc.LogWarning(msg)
2984 class LURedistributeConfig(NoHooksLU):
2985 """Force the redistribution of cluster configuration.
2987 This is a very simple LU.
2992 def ExpandNames(self):
2993 self.needed_locks = {
2994 locking.LEVEL_NODE: locking.ALL_SET,
2996 self.share_locks[locking.LEVEL_NODE] = 1
2998 def Exec(self, feedback_fn):
2999 """Redistribute the configuration.
3002 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3003 _RedistributeAncillaryFiles(self)
3006 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3007 """Sleep and poll for an instance's disk to sync.
3010 if not instance.disks or disks is not None and not disks:
3013 disks = _ExpandCheckDisks(instance, disks)
3016 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3018 node = instance.primary_node
3021 lu.cfg.SetDiskID(dev, node)
3023 # TODO: Convert to utils.Retry
3026 degr_retries = 10 # in seconds, as we sleep 1 second each time
3030 cumul_degraded = False
3031 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3032 msg = rstats.fail_msg
3034 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3037 raise errors.RemoteError("Can't contact node %s for mirror data,"
3038 " aborting." % node)
3041 rstats = rstats.payload
3043 for i, mstat in enumerate(rstats):
3045 lu.LogWarning("Can't compute data for node %s/%s",
3046 node, disks[i].iv_name)
3049 cumul_degraded = (cumul_degraded or
3050 (mstat.is_degraded and mstat.sync_percent is None))
3051 if mstat.sync_percent is not None:
3053 if mstat.estimated_time is not None:
3054 rem_time = ("%s remaining (estimated)" %
3055 utils.FormatSeconds(mstat.estimated_time))
3056 max_time = mstat.estimated_time
3058 rem_time = "no time estimate"
3059 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3060 (disks[i].iv_name, mstat.sync_percent, rem_time))
3062 # if we're done but degraded, let's do a few small retries, to
3063 # make sure we see a stable and not transient situation; therefore
3064 # we force restart of the loop
3065 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3066 logging.info("Degraded disks found, %d retries left", degr_retries)
3074 time.sleep(min(60, max_time))
3077 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3078 return not cumul_degraded
3081 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3082 """Check that mirrors are not degraded.
3084 The ldisk parameter, if True, will change the test from the
3085 is_degraded attribute (which represents overall non-ok status for
3086 the device(s)) to the ldisk (representing the local storage status).
3089 lu.cfg.SetDiskID(dev, node)
3093 if on_primary or dev.AssembleOnSecondary():
3094 rstats = lu.rpc.call_blockdev_find(node, dev)
3095 msg = rstats.fail_msg
3097 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3099 elif not rstats.payload:
3100 lu.LogWarning("Can't find disk on node %s", node)
3104 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3106 result = result and not rstats.payload.is_degraded
3109 for child in dev.children:
3110 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3115 class LUDiagnoseOS(NoHooksLU):
3116 """Logical unit for OS diagnose/query.
3121 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3125 _BLK = "blacklisted"
3127 _FIELDS_STATIC = utils.FieldSet()
3128 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3129 "parameters", "api_versions", _HID, _BLK)
3131 def CheckArguments(self):
3133 raise errors.OpPrereqError("Selective OS query not supported",
3136 _CheckOutputFields(static=self._FIELDS_STATIC,
3137 dynamic=self._FIELDS_DYNAMIC,
3138 selected=self.op.output_fields)
3140 def ExpandNames(self):
3141 # Lock all nodes, in shared mode
3142 # Temporary removal of locks, should be reverted later
3143 # TODO: reintroduce locks when they are lighter-weight
3144 self.needed_locks = {}
3145 #self.share_locks[locking.LEVEL_NODE] = 1
3146 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3149 def _DiagnoseByOS(rlist):
3150 """Remaps a per-node return list into an a per-os per-node dictionary
3152 @param rlist: a map with node names as keys and OS objects as values
3155 @return: a dictionary with osnames as keys and as value another
3156 map, with nodes as keys and tuples of (path, status, diagnose,
3157 variants, parameters, api_versions) as values, eg::
3159 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3160 (/srv/..., False, "invalid api")],
3161 "node2": [(/srv/..., True, "", [], [])]}
3166 # we build here the list of nodes that didn't fail the RPC (at RPC
3167 # level), so that nodes with a non-responding node daemon don't
3168 # make all OSes invalid
3169 good_nodes = [node_name for node_name in rlist
3170 if not rlist[node_name].fail_msg]
3171 for node_name, nr in rlist.items():
3172 if nr.fail_msg or not nr.payload:
3174 for (name, path, status, diagnose, variants,
3175 params, api_versions) in nr.payload:
3176 if name not in all_os:
3177 # build a list of nodes for this os containing empty lists
3178 # for each node in node_list
3180 for nname in good_nodes:
3181 all_os[name][nname] = []
3182 # convert params from [name, help] to (name, help)
3183 params = [tuple(v) for v in params]
3184 all_os[name][node_name].append((path, status, diagnose,
3185 variants, params, api_versions))
3188 def Exec(self, feedback_fn):
3189 """Compute the list of OSes.
3192 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3193 node_data = self.rpc.call_os_diagnose(valid_nodes)
3194 pol = self._DiagnoseByOS(node_data)
3196 cluster = self.cfg.GetClusterInfo()
3198 for os_name in utils.NiceSort(pol.keys()):
3199 os_data = pol[os_name]
3202 (variants, params, api_versions) = null_state = (set(), set(), set())
3203 for idx, osl in enumerate(os_data.values()):
3204 valid = bool(valid and osl and osl[0][1])
3206 (variants, params, api_versions) = null_state
3208 node_variants, node_params, node_api = osl[0][3:6]
3209 if idx == 0: # first entry
3210 variants = set(node_variants)
3211 params = set(node_params)
3212 api_versions = set(node_api)
3213 else: # keep consistency
3214 variants.intersection_update(node_variants)
3215 params.intersection_update(node_params)
3216 api_versions.intersection_update(node_api)
3218 is_hid = os_name in cluster.hidden_os
3219 is_blk = os_name in cluster.blacklisted_os
3220 if ((self._HID not in self.op.output_fields and is_hid) or
3221 (self._BLK not in self.op.output_fields and is_blk) or
3222 (self._VLD not in self.op.output_fields and not valid)):
3225 for field in self.op.output_fields:
3228 elif field == self._VLD:
3230 elif field == "node_status":
3231 # this is just a copy of the dict
3233 for node_name, nos_list in os_data.items():
3234 val[node_name] = nos_list
3235 elif field == "variants":
3236 val = utils.NiceSort(list(variants))
3237 elif field == "parameters":
3239 elif field == "api_versions":
3240 val = list(api_versions)
3241 elif field == self._HID:
3243 elif field == self._BLK:
3246 raise errors.ParameterError(field)
3253 class LURemoveNode(LogicalUnit):
3254 """Logical unit for removing a node.
3257 HPATH = "node-remove"
3258 HTYPE = constants.HTYPE_NODE
3263 def BuildHooksEnv(self):
3266 This doesn't run on the target node in the pre phase as a failed
3267 node would then be impossible to remove.
3271 "OP_TARGET": self.op.node_name,
3272 "NODE_NAME": self.op.node_name,
3274 all_nodes = self.cfg.GetNodeList()
3276 all_nodes.remove(self.op.node_name)
3278 logging.warning("Node %s which is about to be removed not found"
3279 " in the all nodes list", self.op.node_name)
3280 return env, all_nodes, all_nodes
3282 def CheckPrereq(self):
3283 """Check prerequisites.
3286 - the node exists in the configuration
3287 - it does not have primary or secondary instances
3288 - it's not the master
3290 Any errors are signaled by raising errors.OpPrereqError.
3293 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3294 node = self.cfg.GetNodeInfo(self.op.node_name)
3295 assert node is not None
3297 instance_list = self.cfg.GetInstanceList()
3299 masternode = self.cfg.GetMasterNode()
3300 if node.name == masternode:
3301 raise errors.OpPrereqError("Node is the master node,"
3302 " you need to failover first.",
3305 for instance_name in instance_list:
3306 instance = self.cfg.GetInstanceInfo(instance_name)
3307 if node.name in instance.all_nodes:
3308 raise errors.OpPrereqError("Instance %s is still running on the node,"
3309 " please remove first." % instance_name,
3311 self.op.node_name = node.name
3314 def Exec(self, feedback_fn):
3315 """Removes the node from the cluster.
3319 logging.info("Stopping the node daemon and removing configs from node %s",
3322 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3324 # Promote nodes to master candidate as needed
3325 _AdjustCandidatePool(self, exceptions=[node.name])
3326 self.context.RemoveNode(node.name)
3328 # Run post hooks on the node before it's removed
3329 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3331 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3333 # pylint: disable-msg=W0702
3334 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3336 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3337 msg = result.fail_msg
3339 self.LogWarning("Errors encountered on the remote node while leaving"
3340 " the cluster: %s", msg)
3342 # Remove node from our /etc/hosts
3343 if self.cfg.GetClusterInfo().modify_etc_hosts:
3344 # FIXME: this should be done via an rpc call to node daemon
3345 utils.RemoveHostFromEtcHosts(node.name)
3346 _RedistributeAncillaryFiles(self)
3349 class LUQueryNodes(NoHooksLU):
3350 """Logical unit for querying nodes.
3353 # pylint: disable-msg=W0142
3356 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3357 ("use_locking", False, _TBool),
3361 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3362 "master_candidate", "offline", "drained"]
3364 _FIELDS_DYNAMIC = utils.FieldSet(
3366 "mtotal", "mnode", "mfree",
3368 "ctotal", "cnodes", "csockets",
3371 _FIELDS_STATIC = utils.FieldSet(*[
3372 "pinst_cnt", "sinst_cnt",
3373 "pinst_list", "sinst_list",
3374 "pip", "sip", "tags",
3376 "role"] + _SIMPLE_FIELDS
3379 def CheckArguments(self):
3380 _CheckOutputFields(static=self._FIELDS_STATIC,
3381 dynamic=self._FIELDS_DYNAMIC,
3382 selected=self.op.output_fields)
3384 def ExpandNames(self):
3385 self.needed_locks = {}
3386 self.share_locks[locking.LEVEL_NODE] = 1
3389 self.wanted = _GetWantedNodes(self, self.op.names)
3391 self.wanted = locking.ALL_SET
3393 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3394 self.do_locking = self.do_node_query and self.op.use_locking
3396 # if we don't request only static fields, we need to lock the nodes
3397 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3399 def Exec(self, feedback_fn):
3400 """Computes the list of nodes and their attributes.
3403 all_info = self.cfg.GetAllNodesInfo()
3405 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3406 elif self.wanted != locking.ALL_SET:
3407 nodenames = self.wanted
3408 missing = set(nodenames).difference(all_info.keys())
3410 raise errors.OpExecError(
3411 "Some nodes were removed before retrieving their data: %s" % missing)
3413 nodenames = all_info.keys()
3415 nodenames = utils.NiceSort(nodenames)
3416 nodelist = [all_info[name] for name in nodenames]
3418 # begin data gathering
3420 if self.do_node_query:
3422 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3423 self.cfg.GetHypervisorType())
3424 for name in nodenames:
3425 nodeinfo = node_data[name]
3426 if not nodeinfo.fail_msg and nodeinfo.payload:
3427 nodeinfo = nodeinfo.payload
3428 fn = utils.TryConvert
3430 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3431 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3432 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3433 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3434 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3435 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3436 "bootid": nodeinfo.get('bootid', None),
3437 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3438 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3441 live_data[name] = {}
3443 live_data = dict.fromkeys(nodenames, {})
3445 node_to_primary = dict([(name, set()) for name in nodenames])
3446 node_to_secondary = dict([(name, set()) for name in nodenames])
3448 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3449 "sinst_cnt", "sinst_list"))
3450 if inst_fields & frozenset(self.op.output_fields):
3451 inst_data = self.cfg.GetAllInstancesInfo()
3453 for inst in inst_data.values():
3454 if inst.primary_node in node_to_primary:
3455 node_to_primary[inst.primary_node].add(inst.name)
3456 for secnode in inst.secondary_nodes:
3457 if secnode in node_to_secondary:
3458 node_to_secondary[secnode].add(inst.name)
3460 master_node = self.cfg.GetMasterNode()
3462 # end data gathering
3465 for node in nodelist:
3467 for field in self.op.output_fields:
3468 if field in self._SIMPLE_FIELDS:
3469 val = getattr(node, field)
3470 elif field == "pinst_list":
3471 val = list(node_to_primary[node.name])
3472 elif field == "sinst_list":
3473 val = list(node_to_secondary[node.name])
3474 elif field == "pinst_cnt":
3475 val = len(node_to_primary[node.name])
3476 elif field == "sinst_cnt":
3477 val = len(node_to_secondary[node.name])
3478 elif field == "pip":
3479 val = node.primary_ip
3480 elif field == "sip":
3481 val = node.secondary_ip
3482 elif field == "tags":
3483 val = list(node.GetTags())
3484 elif field == "master":
3485 val = node.name == master_node
3486 elif self._FIELDS_DYNAMIC.Matches(field):
3487 val = live_data[node.name].get(field, None)
3488 elif field == "role":
3489 if node.name == master_node:
3491 elif node.master_candidate:
3500 raise errors.ParameterError(field)
3501 node_output.append(val)
3502 output.append(node_output)
3507 class LUQueryNodeVolumes(NoHooksLU):
3508 """Logical unit for getting volumes on node(s).
3512 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3513 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3516 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3517 _FIELDS_STATIC = utils.FieldSet("node")
3519 def CheckArguments(self):
3520 _CheckOutputFields(static=self._FIELDS_STATIC,
3521 dynamic=self._FIELDS_DYNAMIC,
3522 selected=self.op.output_fields)
3524 def ExpandNames(self):
3525 self.needed_locks = {}
3526 self.share_locks[locking.LEVEL_NODE] = 1
3527 if not self.op.nodes:
3528 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3530 self.needed_locks[locking.LEVEL_NODE] = \
3531 _GetWantedNodes(self, self.op.nodes)
3533 def Exec(self, feedback_fn):
3534 """Computes the list of nodes and their attributes.
3537 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3538 volumes = self.rpc.call_node_volumes(nodenames)
3540 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3541 in self.cfg.GetInstanceList()]
3543 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3546 for node in nodenames:
3547 nresult = volumes[node]
3550 msg = nresult.fail_msg
3552 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3555 node_vols = nresult.payload[:]
3556 node_vols.sort(key=lambda vol: vol['dev'])
3558 for vol in node_vols:
3560 for field in self.op.output_fields:
3563 elif field == "phys":
3567 elif field == "name":
3569 elif field == "size":
3570 val = int(float(vol['size']))
3571 elif field == "instance":
3573 if node not in lv_by_node[inst]:
3575 if vol['name'] in lv_by_node[inst][node]:
3581 raise errors.ParameterError(field)
3582 node_output.append(str(val))
3584 output.append(node_output)
3589 class LUQueryNodeStorage(NoHooksLU):
3590 """Logical unit for getting information on storage units on node(s).
3593 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3595 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3596 ("storage_type", _NoDefault, _CheckStorageType),
3597 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3598 ("name", None, _TMaybeString),
3602 def CheckArguments(self):
3603 _CheckOutputFields(static=self._FIELDS_STATIC,
3604 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3605 selected=self.op.output_fields)
3607 def ExpandNames(self):
3608 self.needed_locks = {}
3609 self.share_locks[locking.LEVEL_NODE] = 1
3612 self.needed_locks[locking.LEVEL_NODE] = \
3613 _GetWantedNodes(self, self.op.nodes)
3615 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3617 def Exec(self, feedback_fn):
3618 """Computes the list of nodes and their attributes.
3621 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3623 # Always get name to sort by
3624 if constants.SF_NAME in self.op.output_fields:
3625 fields = self.op.output_fields[:]
3627 fields = [constants.SF_NAME] + self.op.output_fields
3629 # Never ask for node or type as it's only known to the LU
3630 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3631 while extra in fields:
3632 fields.remove(extra)
3634 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3635 name_idx = field_idx[constants.SF_NAME]
3637 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3638 data = self.rpc.call_storage_list(self.nodes,
3639 self.op.storage_type, st_args,
3640 self.op.name, fields)
3644 for node in utils.NiceSort(self.nodes):
3645 nresult = data[node]
3649 msg = nresult.fail_msg
3651 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3654 rows = dict([(row[name_idx], row) for row in nresult.payload])
3656 for name in utils.NiceSort(rows.keys()):
3661 for field in self.op.output_fields:
3662 if field == constants.SF_NODE:
3664 elif field == constants.SF_TYPE:
3665 val = self.op.storage_type
3666 elif field in field_idx:
3667 val = row[field_idx[field]]
3669 raise errors.ParameterError(field)
3678 class LUModifyNodeStorage(NoHooksLU):
3679 """Logical unit for modifying a storage volume on a node.
3684 ("storage_type", _NoDefault, _CheckStorageType),
3685 ("name", _NoDefault, _TNonEmptyString),
3686 ("changes", _NoDefault, _TDict),
3690 def CheckArguments(self):
3691 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3693 storage_type = self.op.storage_type
3696 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3698 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3699 " modified" % storage_type,
3702 diff = set(self.op.changes.keys()) - modifiable
3704 raise errors.OpPrereqError("The following fields can not be modified for"
3705 " storage units of type '%s': %r" %
3706 (storage_type, list(diff)),
3709 def ExpandNames(self):
3710 self.needed_locks = {
3711 locking.LEVEL_NODE: self.op.node_name,
3714 def Exec(self, feedback_fn):
3715 """Computes the list of nodes and their attributes.
3718 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3719 result = self.rpc.call_storage_modify(self.op.node_name,
3720 self.op.storage_type, st_args,
3721 self.op.name, self.op.changes)
3722 result.Raise("Failed to modify storage unit '%s' on %s" %
3723 (self.op.name, self.op.node_name))
3726 class LUAddNode(LogicalUnit):
3727 """Logical unit for adding node to the cluster.
3731 HTYPE = constants.HTYPE_NODE
3734 ("primary_ip", None, _NoType),
3735 ("secondary_ip", None, _TMaybeString),
3736 ("readd", False, _TBool),
3739 def CheckArguments(self):
3740 # validate/normalize the node name
3741 self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3743 def BuildHooksEnv(self):
3746 This will run on all nodes before, and on all nodes + the new node after.
3750 "OP_TARGET": self.op.node_name,
3751 "NODE_NAME": self.op.node_name,
3752 "NODE_PIP": self.op.primary_ip,
3753 "NODE_SIP": self.op.secondary_ip,
3755 nodes_0 = self.cfg.GetNodeList()
3756 nodes_1 = nodes_0 + [self.op.node_name, ]
3757 return env, nodes_0, nodes_1
3759 def CheckPrereq(self):
3760 """Check prerequisites.
3763 - the new node is not already in the config
3765 - its parameters (single/dual homed) matches the cluster
3767 Any errors are signaled by raising errors.OpPrereqError.
3770 node_name = self.op.node_name
3773 dns_data = netutils.GetHostInfo(node_name)
3775 node = dns_data.name
3776 primary_ip = self.op.primary_ip = dns_data.ip
3777 if self.op.secondary_ip is None:
3778 self.op.secondary_ip = primary_ip
3779 if not netutils.IsValidIP4(self.op.secondary_ip):
3780 raise errors.OpPrereqError("Invalid secondary IP given",
3782 secondary_ip = self.op.secondary_ip
3784 node_list = cfg.GetNodeList()
3785 if not self.op.readd and node in node_list:
3786 raise errors.OpPrereqError("Node %s is already in the configuration" %
3787 node, errors.ECODE_EXISTS)
3788 elif self.op.readd and node not in node_list:
3789 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3792 self.changed_primary_ip = False
3794 for existing_node_name in node_list:
3795 existing_node = cfg.GetNodeInfo(existing_node_name)
3797 if self.op.readd and node == existing_node_name:
3798 if existing_node.secondary_ip != secondary_ip:
3799 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3800 " address configuration as before",
3802 if existing_node.primary_ip != primary_ip:
3803 self.changed_primary_ip = True
3807 if (existing_node.primary_ip == primary_ip or
3808 existing_node.secondary_ip == primary_ip or
3809 existing_node.primary_ip == secondary_ip or
3810 existing_node.secondary_ip == secondary_ip):
3811 raise errors.OpPrereqError("New node ip address(es) conflict with"
3812 " existing node %s" % existing_node.name,
3813 errors.ECODE_NOTUNIQUE)
3815 # check that the type of the node (single versus dual homed) is the
3816 # same as for the master
3817 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3818 master_singlehomed = myself.secondary_ip == myself.primary_ip
3819 newbie_singlehomed = secondary_ip == primary_ip
3820 if master_singlehomed != newbie_singlehomed:
3821 if master_singlehomed:
3822 raise errors.OpPrereqError("The master has no private ip but the"
3823 " new node has one",
3826 raise errors.OpPrereqError("The master has a private ip but the"
3827 " new node doesn't have one",
3830 # checks reachability
3831 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3832 raise errors.OpPrereqError("Node not reachable by ping",
3833 errors.ECODE_ENVIRON)
3835 if not newbie_singlehomed:
3836 # check reachability from my secondary ip to newbie's secondary ip
3837 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3838 source=myself.secondary_ip):
3839 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3840 " based ping to noded port",
3841 errors.ECODE_ENVIRON)
3848 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3851 self.new_node = self.cfg.GetNodeInfo(node)
3852 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3854 self.new_node = objects.Node(name=node,
3855 primary_ip=primary_ip,
3856 secondary_ip=secondary_ip,
3857 master_candidate=self.master_candidate,
3858 offline=False, drained=False)
3860 def Exec(self, feedback_fn):
3861 """Adds the new node to the cluster.
3864 new_node = self.new_node
3865 node = new_node.name
3867 # for re-adds, reset the offline/drained/master-candidate flags;
3868 # we need to reset here, otherwise offline would prevent RPC calls
3869 # later in the procedure; this also means that if the re-add
3870 # fails, we are left with a non-offlined, broken node
3872 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3873 self.LogInfo("Readding a node, the offline/drained flags were reset")
3874 # if we demote the node, we do cleanup later in the procedure
3875 new_node.master_candidate = self.master_candidate
3876 if self.changed_primary_ip:
3877 new_node.primary_ip = self.op.primary_ip
3879 # notify the user about any possible mc promotion
3880 if new_node.master_candidate:
3881 self.LogInfo("Node will be a master candidate")
3883 # check connectivity
3884 result = self.rpc.call_version([node])[node]
3885 result.Raise("Can't get version information from node %s" % node)
3886 if constants.PROTOCOL_VERSION == result.payload:
3887 logging.info("Communication to node %s fine, sw version %s match",
3888 node, result.payload)
3890 raise errors.OpExecError("Version mismatch master version %s,"
3891 " node version %s" %
3892 (constants.PROTOCOL_VERSION, result.payload))
3895 if self.cfg.GetClusterInfo().modify_ssh_setup:
3896 logging.info("Copy ssh key to node %s", node)
3897 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3899 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3900 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3904 keyarray.append(utils.ReadFile(i))
3906 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3907 keyarray[2], keyarray[3], keyarray[4],
3909 result.Raise("Cannot transfer ssh keys to the new node")
3911 # Add node to our /etc/hosts, and add key to known_hosts
3912 if self.cfg.GetClusterInfo().modify_etc_hosts:
3913 # FIXME: this should be done via an rpc call to node daemon
3914 utils.AddHostToEtcHosts(new_node.name)
3916 if new_node.secondary_ip != new_node.primary_ip:
3917 result = self.rpc.call_node_has_ip_address(new_node.name,
3918 new_node.secondary_ip)
3919 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3920 prereq=True, ecode=errors.ECODE_ENVIRON)
3921 if not result.payload:
3922 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3923 " you gave (%s). Please fix and re-run this"
3924 " command." % new_node.secondary_ip)
3926 node_verify_list = [self.cfg.GetMasterNode()]
3927 node_verify_param = {
3928 constants.NV_NODELIST: [node],
3929 # TODO: do a node-net-test as well?
3932 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3933 self.cfg.GetClusterName())
3934 for verifier in node_verify_list:
3935 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3936 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3938 for failed in nl_payload:
3939 feedback_fn("ssh/hostname verification failed"
3940 " (checking from %s): %s" %
3941 (verifier, nl_payload[failed]))
3942 raise errors.OpExecError("ssh/hostname verification failed.")
3945 _RedistributeAncillaryFiles(self)
3946 self.context.ReaddNode(new_node)
3947 # make sure we redistribute the config
3948 self.cfg.Update(new_node, feedback_fn)
3949 # and make sure the new node will not have old files around
3950 if not new_node.master_candidate:
3951 result = self.rpc.call_node_demote_from_mc(new_node.name)
3952 msg = result.fail_msg
3954 self.LogWarning("Node failed to demote itself from master"
3955 " candidate status: %s" % msg)
3957 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3958 self.context.AddNode(new_node, self.proc.GetECId())
3961 class LUSetNodeParams(LogicalUnit):
3962 """Modifies the parameters of a node.
3965 HPATH = "node-modify"
3966 HTYPE = constants.HTYPE_NODE
3969 ("master_candidate", None, _TMaybeBool),
3970 ("offline", None, _TMaybeBool),
3971 ("drained", None, _TMaybeBool),
3972 ("auto_promote", False, _TBool),
3977 def CheckArguments(self):
3978 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3979 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3980 if all_mods.count(None) == 3:
3981 raise errors.OpPrereqError("Please pass at least one modification",
3983 if all_mods.count(True) > 1:
3984 raise errors.OpPrereqError("Can't set the node into more than one"
3985 " state at the same time",
3988 # Boolean value that tells us whether we're offlining or draining the node
3989 self.offline_or_drain = (self.op.offline == True or
3990 self.op.drained == True)
3991 self.deoffline_or_drain = (self.op.offline == False or
3992 self.op.drained == False)
3993 self.might_demote = (self.op.master_candidate == False or
3994 self.offline_or_drain)
3996 self.lock_all = self.op.auto_promote and self.might_demote
3999 def ExpandNames(self):
4001 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4003 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4005 def BuildHooksEnv(self):
4008 This runs on the master node.
4012 "OP_TARGET": self.op.node_name,
4013 "MASTER_CANDIDATE": str(self.op.master_candidate),
4014 "OFFLINE": str(self.op.offline),
4015 "DRAINED": str(self.op.drained),
4017 nl = [self.cfg.GetMasterNode(),
4021 def CheckPrereq(self):
4022 """Check prerequisites.
4024 This only checks the instance list against the existing names.
4027 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4029 if (self.op.master_candidate is not None or
4030 self.op.drained is not None or
4031 self.op.offline is not None):
4032 # we can't change the master's node flags
4033 if self.op.node_name == self.cfg.GetMasterNode():
4034 raise errors.OpPrereqError("The master role can be changed"
4035 " only via master-failover",
4039 if node.master_candidate and self.might_demote and not self.lock_all:
4040 assert not self.op.auto_promote, "auto-promote set but lock_all not"
4041 # check if after removing the current node, we're missing master
4043 (mc_remaining, mc_should, _) = \
4044 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4045 if mc_remaining < mc_should:
4046 raise errors.OpPrereqError("Not enough master candidates, please"
4047 " pass auto_promote to allow promotion",
4050 if (self.op.master_candidate == True and
4051 ((node.offline and not self.op.offline == False) or
4052 (node.drained and not self.op.drained == False))):
4053 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4054 " to master_candidate" % node.name,
4057 # If we're being deofflined/drained, we'll MC ourself if needed
4058 if (self.deoffline_or_drain and not self.offline_or_drain and not
4059 self.op.master_candidate == True and not node.master_candidate):
4060 self.op.master_candidate = _DecideSelfPromotion(self)
4061 if self.op.master_candidate:
4062 self.LogInfo("Autopromoting node to master candidate")
4066 def Exec(self, feedback_fn):
4075 if self.op.offline is not None:
4076 node.offline = self.op.offline
4077 result.append(("offline", str(self.op.offline)))
4078 if self.op.offline == True:
4079 if node.master_candidate:
4080 node.master_candidate = False
4082 result.append(("master_candidate", "auto-demotion due to offline"))
4084 node.drained = False
4085 result.append(("drained", "clear drained status due to offline"))
4087 if self.op.master_candidate is not None:
4088 node.master_candidate = self.op.master_candidate
4090 result.append(("master_candidate", str(self.op.master_candidate)))
4091 if self.op.master_candidate == False:
4092 rrc = self.rpc.call_node_demote_from_mc(node.name)
4095 self.LogWarning("Node failed to demote itself: %s" % msg)
4097 if self.op.drained is not None:
4098 node.drained = self.op.drained
4099 result.append(("drained", str(self.op.drained)))
4100 if self.op.drained == True:
4101 if node.master_candidate:
4102 node.master_candidate = False
4104 result.append(("master_candidate", "auto-demotion due to drain"))
4105 rrc = self.rpc.call_node_demote_from_mc(node.name)
4108 self.LogWarning("Node failed to demote itself: %s" % msg)
4110 node.offline = False
4111 result.append(("offline", "clear offline status due to drain"))
4113 # we locked all nodes, we adjust the CP before updating this node
4115 _AdjustCandidatePool(self, [node.name])
4117 # this will trigger configuration file update, if needed
4118 self.cfg.Update(node, feedback_fn)
4120 # this will trigger job queue propagation or cleanup
4122 self.context.ReaddNode(node)
4127 class LUPowercycleNode(NoHooksLU):
4128 """Powercycles a node.
4137 def CheckArguments(self):
4138 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4139 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4140 raise errors.OpPrereqError("The node is the master and the force"
4141 " parameter was not set",
4144 def ExpandNames(self):
4145 """Locking for PowercycleNode.
4147 This is a last-resort option and shouldn't block on other
4148 jobs. Therefore, we grab no locks.
4151 self.needed_locks = {}
4153 def Exec(self, feedback_fn):
4157 result = self.rpc.call_node_powercycle(self.op.node_name,
4158 self.cfg.GetHypervisorType())
4159 result.Raise("Failed to schedule the reboot")
4160 return result.payload
4163 class LUQueryClusterInfo(NoHooksLU):
4164 """Query cluster configuration.
4169 def ExpandNames(self):
4170 self.needed_locks = {}
4172 def Exec(self, feedback_fn):
4173 """Return cluster config.
4176 cluster = self.cfg.GetClusterInfo()
4179 # Filter just for enabled hypervisors
4180 for os_name, hv_dict in cluster.os_hvp.items():
4181 os_hvp[os_name] = {}
4182 for hv_name, hv_params in hv_dict.items():
4183 if hv_name in cluster.enabled_hypervisors:
4184 os_hvp[os_name][hv_name] = hv_params
4187 "software_version": constants.RELEASE_VERSION,
4188 "protocol_version": constants.PROTOCOL_VERSION,
4189 "config_version": constants.CONFIG_VERSION,
4190 "os_api_version": max(constants.OS_API_VERSIONS),
4191 "export_version": constants.EXPORT_VERSION,
4192 "architecture": (platform.architecture()[0], platform.machine()),
4193 "name": cluster.cluster_name,
4194 "master": cluster.master_node,
4195 "default_hypervisor": cluster.enabled_hypervisors[0],
4196 "enabled_hypervisors": cluster.enabled_hypervisors,
4197 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4198 for hypervisor_name in cluster.enabled_hypervisors]),
4200 "beparams": cluster.beparams,
4201 "osparams": cluster.osparams,
4202 "nicparams": cluster.nicparams,
4203 "candidate_pool_size": cluster.candidate_pool_size,
4204 "master_netdev": cluster.master_netdev,
4205 "volume_group_name": cluster.volume_group_name,
4206 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4207 "file_storage_dir": cluster.file_storage_dir,
4208 "maintain_node_health": cluster.maintain_node_health,
4209 "ctime": cluster.ctime,
4210 "mtime": cluster.mtime,
4211 "uuid": cluster.uuid,
4212 "tags": list(cluster.GetTags()),
4213 "uid_pool": cluster.uid_pool,
4214 "default_iallocator": cluster.default_iallocator,
4215 "reserved_lvs": cluster.reserved_lvs,
4221 class LUQueryConfigValues(NoHooksLU):
4222 """Return configuration values.
4225 _OP_PARAMS = [_POutputFields]
4227 _FIELDS_DYNAMIC = utils.FieldSet()
4228 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4229 "watcher_pause", "volume_group_name")
4231 def CheckArguments(self):
4232 _CheckOutputFields(static=self._FIELDS_STATIC,
4233 dynamic=self._FIELDS_DYNAMIC,
4234 selected=self.op.output_fields)
4236 def ExpandNames(self):
4237 self.needed_locks = {}
4239 def Exec(self, feedback_fn):
4240 """Dump a representation of the cluster config to the standard output.
4244 for field in self.op.output_fields:
4245 if field == "cluster_name":
4246 entry = self.cfg.GetClusterName()
4247 elif field == "master_node":
4248 entry = self.cfg.GetMasterNode()
4249 elif field == "drain_flag":
4250 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4251 elif field == "watcher_pause":
4252 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4253 elif field == "volume_group_name":
4254 entry = self.cfg.GetVGName()
4256 raise errors.ParameterError(field)
4257 values.append(entry)
4261 class LUActivateInstanceDisks(NoHooksLU):
4262 """Bring up an instance's disks.
4267 ("ignore_size", False, _TBool),
4271 def ExpandNames(self):
4272 self._ExpandAndLockInstance()
4273 self.needed_locks[locking.LEVEL_NODE] = []
4274 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4276 def DeclareLocks(self, level):
4277 if level == locking.LEVEL_NODE:
4278 self._LockInstancesNodes()
4280 def CheckPrereq(self):
4281 """Check prerequisites.
4283 This checks that the instance is in the cluster.
4286 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4287 assert self.instance is not None, \
4288 "Cannot retrieve locked instance %s" % self.op.instance_name
4289 _CheckNodeOnline(self, self.instance.primary_node)
4291 def Exec(self, feedback_fn):
4292 """Activate the disks.
4295 disks_ok, disks_info = \
4296 _AssembleInstanceDisks(self, self.instance,
4297 ignore_size=self.op.ignore_size)
4299 raise errors.OpExecError("Cannot activate block devices")
4304 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4306 """Prepare the block devices for an instance.
4308 This sets up the block devices on all nodes.
4310 @type lu: L{LogicalUnit}
4311 @param lu: the logical unit on whose behalf we execute
4312 @type instance: L{objects.Instance}
4313 @param instance: the instance for whose disks we assemble
4314 @type disks: list of L{objects.Disk} or None
4315 @param disks: which disks to assemble (or all, if None)
4316 @type ignore_secondaries: boolean
4317 @param ignore_secondaries: if true, errors on secondary nodes
4318 won't result in an error return from the function
4319 @type ignore_size: boolean
4320 @param ignore_size: if true, the current known size of the disk
4321 will not be used during the disk activation, useful for cases
4322 when the size is wrong
4323 @return: False if the operation failed, otherwise a list of
4324 (host, instance_visible_name, node_visible_name)
4325 with the mapping from node devices to instance devices
4330 iname = instance.name
4331 disks = _ExpandCheckDisks(instance, disks)
4333 # With the two passes mechanism we try to reduce the window of
4334 # opportunity for the race condition of switching DRBD to primary
4335 # before handshaking occured, but we do not eliminate it
4337 # The proper fix would be to wait (with some limits) until the
4338 # connection has been made and drbd transitions from WFConnection
4339 # into any other network-connected state (Connected, SyncTarget,
4342 # 1st pass, assemble on all nodes in secondary mode
4343 for inst_disk in disks:
4344 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4346 node_disk = node_disk.Copy()
4347 node_disk.UnsetSize()
4348 lu.cfg.SetDiskID(node_disk, node)
4349 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4350 msg = result.fail_msg
4352 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4353 " (is_primary=False, pass=1): %s",
4354 inst_disk.iv_name, node, msg)
4355 if not ignore_secondaries:
4358 # FIXME: race condition on drbd migration to primary
4360 # 2nd pass, do only the primary node
4361 for inst_disk in disks:
4364 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4365 if node != instance.primary_node:
4368 node_disk = node_disk.Copy()
4369 node_disk.UnsetSize()
4370 lu.cfg.SetDiskID(node_disk, node)
4371 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4372 msg = result.fail_msg
4374 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4375 " (is_primary=True, pass=2): %s",
4376 inst_disk.iv_name, node, msg)
4379 dev_path = result.payload
4381 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4383 # leave the disks configured for the primary node
4384 # this is a workaround that would be fixed better by
4385 # improving the logical/physical id handling
4387 lu.cfg.SetDiskID(disk, instance.primary_node)
4389 return disks_ok, device_info
4392 def _StartInstanceDisks(lu, instance, force):
4393 """Start the disks of an instance.
4396 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4397 ignore_secondaries=force)
4399 _ShutdownInstanceDisks(lu, instance)
4400 if force is not None and not force:
4401 lu.proc.LogWarning("", hint="If the message above refers to a"
4403 " you can retry the operation using '--force'.")
4404 raise errors.OpExecError("Disk consistency error")
4407 class LUDeactivateInstanceDisks(NoHooksLU):
4408 """Shutdown an instance's disks.
4416 def ExpandNames(self):
4417 self._ExpandAndLockInstance()
4418 self.needed_locks[locking.LEVEL_NODE] = []
4419 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4421 def DeclareLocks(self, level):
4422 if level == locking.LEVEL_NODE:
4423 self._LockInstancesNodes()
4425 def CheckPrereq(self):
4426 """Check prerequisites.
4428 This checks that the instance is in the cluster.
4431 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4432 assert self.instance is not None, \
4433 "Cannot retrieve locked instance %s" % self.op.instance_name
4435 def Exec(self, feedback_fn):
4436 """Deactivate the disks
4439 instance = self.instance
4440 _SafeShutdownInstanceDisks(self, instance)
4443 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4444 """Shutdown block devices of an instance.
4446 This function checks if an instance is running, before calling
4447 _ShutdownInstanceDisks.
4450 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4451 _ShutdownInstanceDisks(lu, instance, disks=disks)
4454 def _ExpandCheckDisks(instance, disks):
4455 """Return the instance disks selected by the disks list
4457 @type disks: list of L{objects.Disk} or None
4458 @param disks: selected disks
4459 @rtype: list of L{objects.Disk}
4460 @return: selected instance disks to act on
4464 return instance.disks
4466 if not set(disks).issubset(instance.disks):
4467 raise errors.ProgrammerError("Can only act on disks belonging to the"
4472 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4473 """Shutdown block devices of an instance.
4475 This does the shutdown on all nodes of the instance.
4477 If the ignore_primary is false, errors on the primary node are
4482 disks = _ExpandCheckDisks(instance, disks)
4485 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4486 lu.cfg.SetDiskID(top_disk, node)
4487 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4488 msg = result.fail_msg
4490 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4491 disk.iv_name, node, msg)
4492 if not ignore_primary or node != instance.primary_node:
4497 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4498 """Checks if a node has enough free memory.
4500 This function check if a given node has the needed amount of free
4501 memory. In case the node has less memory or we cannot get the
4502 information from the node, this function raise an OpPrereqError
4505 @type lu: C{LogicalUnit}
4506 @param lu: a logical unit from which we get configuration data
4508 @param node: the node to check
4509 @type reason: C{str}
4510 @param reason: string to use in the error message
4511 @type requested: C{int}
4512 @param requested: the amount of memory in MiB to check for
4513 @type hypervisor_name: C{str}
4514 @param hypervisor_name: the hypervisor to ask for memory stats
4515 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4516 we cannot check the node
4519 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4520 nodeinfo[node].Raise("Can't get data from node %s" % node,
4521 prereq=True, ecode=errors.ECODE_ENVIRON)
4522 free_mem = nodeinfo[node].payload.get('memory_free', None)
4523 if not isinstance(free_mem, int):
4524 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4525 " was '%s'" % (node, free_mem),
4526 errors.ECODE_ENVIRON)
4527 if requested > free_mem:
4528 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4529 " needed %s MiB, available %s MiB" %
4530 (node, reason, requested, free_mem),
4534 def _CheckNodesFreeDisk(lu, nodenames, requested):
4535 """Checks if nodes have enough free disk space in the default VG.
4537 This function check if all given nodes have the needed amount of
4538 free disk. In case any node has less disk or we cannot get the
4539 information from the node, this function raise an OpPrereqError
4542 @type lu: C{LogicalUnit}
4543 @param lu: a logical unit from which we get configuration data
4544 @type nodenames: C{list}
4545 @param nodenames: the list of node names to check
4546 @type requested: C{int}
4547 @param requested: the amount of disk in MiB to check for
4548 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4549 we cannot check the node
4552 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4553 lu.cfg.GetHypervisorType())
4554 for node in nodenames:
4555 info = nodeinfo[node]
4556 info.Raise("Cannot get current information from node %s" % node,
4557 prereq=True, ecode=errors.ECODE_ENVIRON)
4558 vg_free = info.payload.get("vg_free", None)
4559 if not isinstance(vg_free, int):
4560 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4561 " result was '%s'" % (node, vg_free),
4562 errors.ECODE_ENVIRON)
4563 if requested > vg_free:
4564 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4565 " required %d MiB, available %d MiB" %
4566 (node, requested, vg_free),
4570 class LUStartupInstance(LogicalUnit):
4571 """Starts an instance.
4574 HPATH = "instance-start"
4575 HTYPE = constants.HTYPE_INSTANCE
4579 ("hvparams", _EmptyDict, _TDict),
4580 ("beparams", _EmptyDict, _TDict),
4584 def CheckArguments(self):
4586 if self.op.beparams:
4587 # fill the beparams dict
4588 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4590 def ExpandNames(self):
4591 self._ExpandAndLockInstance()
4593 def BuildHooksEnv(self):
4596 This runs on master, primary and secondary nodes of the instance.
4600 "FORCE": self.op.force,
4602 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4603 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4606 def CheckPrereq(self):
4607 """Check prerequisites.
4609 This checks that the instance is in the cluster.
4612 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4613 assert self.instance is not None, \
4614 "Cannot retrieve locked instance %s" % self.op.instance_name
4617 if self.op.hvparams:
4618 # check hypervisor parameter syntax (locally)
4619 cluster = self.cfg.GetClusterInfo()
4620 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4621 filled_hvp = cluster.FillHV(instance)
4622 filled_hvp.update(self.op.hvparams)
4623 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4624 hv_type.CheckParameterSyntax(filled_hvp)
4625 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4627 _CheckNodeOnline(self, instance.primary_node)
4629 bep = self.cfg.GetClusterInfo().FillBE(instance)
4630 # check bridges existence
4631 _CheckInstanceBridgesExist(self, instance)
4633 remote_info = self.rpc.call_instance_info(instance.primary_node,
4635 instance.hypervisor)
4636 remote_info.Raise("Error checking node %s" % instance.primary_node,
4637 prereq=True, ecode=errors.ECODE_ENVIRON)
4638 if not remote_info.payload: # not running already
4639 _CheckNodeFreeMemory(self, instance.primary_node,
4640 "starting instance %s" % instance.name,
4641 bep[constants.BE_MEMORY], instance.hypervisor)
4643 def Exec(self, feedback_fn):
4644 """Start the instance.
4647 instance = self.instance
4648 force = self.op.force
4650 self.cfg.MarkInstanceUp(instance.name)
4652 node_current = instance.primary_node
4654 _StartInstanceDisks(self, instance, force)
4656 result = self.rpc.call_instance_start(node_current, instance,
4657 self.op.hvparams, self.op.beparams)
4658 msg = result.fail_msg
4660 _ShutdownInstanceDisks(self, instance)
4661 raise errors.OpExecError("Could not start instance: %s" % msg)
4664 class LURebootInstance(LogicalUnit):
4665 """Reboot an instance.
4668 HPATH = "instance-reboot"
4669 HTYPE = constants.HTYPE_INSTANCE
4672 ("ignore_secondaries", False, _TBool),
4673 ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4678 def ExpandNames(self):
4679 self._ExpandAndLockInstance()
4681 def BuildHooksEnv(self):
4684 This runs on master, primary and secondary nodes of the instance.
4688 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4689 "REBOOT_TYPE": self.op.reboot_type,
4690 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4692 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4693 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4696 def CheckPrereq(self):
4697 """Check prerequisites.
4699 This checks that the instance is in the cluster.
4702 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4703 assert self.instance is not None, \
4704 "Cannot retrieve locked instance %s" % self.op.instance_name
4706 _CheckNodeOnline(self, instance.primary_node)
4708 # check bridges existence
4709 _CheckInstanceBridgesExist(self, instance)
4711 def Exec(self, feedback_fn):
4712 """Reboot the instance.
4715 instance = self.instance
4716 ignore_secondaries = self.op.ignore_secondaries
4717 reboot_type = self.op.reboot_type
4719 node_current = instance.primary_node
4721 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4722 constants.INSTANCE_REBOOT_HARD]:
4723 for disk in instance.disks:
4724 self.cfg.SetDiskID(disk, node_current)
4725 result = self.rpc.call_instance_reboot(node_current, instance,
4727 self.op.shutdown_timeout)
4728 result.Raise("Could not reboot instance")
4730 result = self.rpc.call_instance_shutdown(node_current, instance,
4731 self.op.shutdown_timeout)
4732 result.Raise("Could not shutdown instance for full reboot")
4733 _ShutdownInstanceDisks(self, instance)
4734 _StartInstanceDisks(self, instance, ignore_secondaries)
4735 result = self.rpc.call_instance_start(node_current, instance, None, None)
4736 msg = result.fail_msg
4738 _ShutdownInstanceDisks(self, instance)
4739 raise errors.OpExecError("Could not start instance for"
4740 " full reboot: %s" % msg)
4742 self.cfg.MarkInstanceUp(instance.name)
4745 class LUShutdownInstance(LogicalUnit):
4746 """Shutdown an instance.
4749 HPATH = "instance-stop"
4750 HTYPE = constants.HTYPE_INSTANCE
4753 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4757 def ExpandNames(self):
4758 self._ExpandAndLockInstance()
4760 def BuildHooksEnv(self):
4763 This runs on master, primary and secondary nodes of the instance.
4766 env = _BuildInstanceHookEnvByObject(self, self.instance)
4767 env["TIMEOUT"] = self.op.timeout
4768 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4771 def CheckPrereq(self):
4772 """Check prerequisites.
4774 This checks that the instance is in the cluster.
4777 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4778 assert self.instance is not None, \
4779 "Cannot retrieve locked instance %s" % self.op.instance_name
4780 _CheckNodeOnline(self, self.instance.primary_node)
4782 def Exec(self, feedback_fn):
4783 """Shutdown the instance.
4786 instance = self.instance
4787 node_current = instance.primary_node
4788 timeout = self.op.timeout
4789 self.cfg.MarkInstanceDown(instance.name)
4790 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4791 msg = result.fail_msg
4793 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4795 _ShutdownInstanceDisks(self, instance)
4798 class LUReinstallInstance(LogicalUnit):
4799 """Reinstall an instance.
4802 HPATH = "instance-reinstall"
4803 HTYPE = constants.HTYPE_INSTANCE
4806 ("os_type", None, _TMaybeString),
4807 ("force_variant", False, _TBool),
4811 def ExpandNames(self):
4812 self._ExpandAndLockInstance()
4814 def BuildHooksEnv(self):
4817 This runs on master, primary and secondary nodes of the instance.
4820 env = _BuildInstanceHookEnvByObject(self, self.instance)
4821 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4824 def CheckPrereq(self):
4825 """Check prerequisites.
4827 This checks that the instance is in the cluster and is not running.
4830 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4831 assert instance is not None, \
4832 "Cannot retrieve locked instance %s" % self.op.instance_name
4833 _CheckNodeOnline(self, instance.primary_node)
4835 if instance.disk_template == constants.DT_DISKLESS:
4836 raise errors.OpPrereqError("Instance '%s' has no disks" %
4837 self.op.instance_name,
4839 _CheckInstanceDown(self, instance, "cannot reinstall")
4841 if self.op.os_type is not None:
4843 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4844 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4846 self.instance = instance
4848 def Exec(self, feedback_fn):
4849 """Reinstall the instance.
4852 inst = self.instance
4854 if self.op.os_type is not None:
4855 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4856 inst.os = self.op.os_type
4857 self.cfg.Update(inst, feedback_fn)
4859 _StartInstanceDisks(self, inst, None)
4861 feedback_fn("Running the instance OS create scripts...")
4862 # FIXME: pass debug option from opcode to backend
4863 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4864 self.op.debug_level)
4865 result.Raise("Could not install OS for instance %s on node %s" %
4866 (inst.name, inst.primary_node))
4868 _ShutdownInstanceDisks(self, inst)
4871 class LURecreateInstanceDisks(LogicalUnit):
4872 """Recreate an instance's missing disks.
4875 HPATH = "instance-recreate-disks"
4876 HTYPE = constants.HTYPE_INSTANCE
4879 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4883 def ExpandNames(self):
4884 self._ExpandAndLockInstance()
4886 def BuildHooksEnv(self):
4889 This runs on master, primary and secondary nodes of the instance.
4892 env = _BuildInstanceHookEnvByObject(self, self.instance)
4893 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4896 def CheckPrereq(self):
4897 """Check prerequisites.
4899 This checks that the instance is in the cluster and is not running.
4902 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4903 assert instance is not None, \
4904 "Cannot retrieve locked instance %s" % self.op.instance_name
4905 _CheckNodeOnline(self, instance.primary_node)
4907 if instance.disk_template == constants.DT_DISKLESS:
4908 raise errors.OpPrereqError("Instance '%s' has no disks" %
4909 self.op.instance_name, errors.ECODE_INVAL)
4910 _CheckInstanceDown(self, instance, "cannot recreate disks")
4912 if not self.op.disks:
4913 self.op.disks = range(len(instance.disks))
4915 for idx in self.op.disks:
4916 if idx >= len(instance.disks):
4917 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4920 self.instance = instance
4922 def Exec(self, feedback_fn):
4923 """Recreate the disks.
4927 for idx, _ in enumerate(self.instance.disks):
4928 if idx not in self.op.disks: # disk idx has not been passed in
4932 _CreateDisks(self, self.instance, to_skip=to_skip)
4935 class LURenameInstance(LogicalUnit):
4936 """Rename an instance.
4939 HPATH = "instance-rename"
4940 HTYPE = constants.HTYPE_INSTANCE
4943 ("new_name", _NoDefault, _TNonEmptyString),
4944 ("ip_check", False, _TBool),
4945 ("name_check", True, _TBool),
4948 def CheckArguments(self):
4952 if self.op.ip_check and not self.op.name_check:
4953 # TODO: make the ip check more flexible and not depend on the name check
4954 raise errors.OpPrereqError("Cannot do ip check without a name check",
4957 def BuildHooksEnv(self):
4960 This runs on master, primary and secondary nodes of the instance.
4963 env = _BuildInstanceHookEnvByObject(self, self.instance)
4964 env["INSTANCE_NEW_NAME"] = self.op.new_name
4965 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4968 def CheckPrereq(self):
4969 """Check prerequisites.
4971 This checks that the instance is in the cluster and is not running.
4974 self.op.instance_name = _ExpandInstanceName(self.cfg,
4975 self.op.instance_name)
4976 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4977 assert instance is not None
4978 _CheckNodeOnline(self, instance.primary_node)
4979 _CheckInstanceDown(self, instance, "cannot rename")
4980 self.instance = instance
4982 new_name = self.op.new_name
4983 if self.op.name_check:
4984 hostinfo = netutils.HostInfo(netutils.HostInfo.NormalizeName(new_name))
4985 new_name = self.op.new_name = hostinfo.name
4986 if (self.op.ip_check and
4987 netutils.TcpPing(hostinfo.ip, constants.DEFAULT_NODED_PORT)):
4988 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4989 (hostinfo.ip, new_name),
4990 errors.ECODE_NOTUNIQUE)
4992 instance_list = self.cfg.GetInstanceList()
4993 if new_name in instance_list:
4994 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4995 new_name, errors.ECODE_EXISTS)
4998 def Exec(self, feedback_fn):
4999 """Reinstall the instance.
5002 inst = self.instance
5003 old_name = inst.name
5005 if inst.disk_template == constants.DT_FILE:
5006 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5008 self.cfg.RenameInstance(inst.name, self.op.new_name)
5009 # Change the instance lock. This is definitely safe while we hold the BGL
5010 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5011 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5013 # re-read the instance from the configuration after rename
5014 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5016 if inst.disk_template == constants.DT_FILE:
5017 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5018 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5019 old_file_storage_dir,
5020 new_file_storage_dir)
5021 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5022 " (but the instance has been renamed in Ganeti)" %
5023 (inst.primary_node, old_file_storage_dir,
5024 new_file_storage_dir))
5026 _StartInstanceDisks(self, inst, None)
5028 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5029 old_name, self.op.debug_level)
5030 msg = result.fail_msg
5032 msg = ("Could not run OS rename script for instance %s on node %s"
5033 " (but the instance has been renamed in Ganeti): %s" %
5034 (inst.name, inst.primary_node, msg))
5035 self.proc.LogWarning(msg)
5037 _ShutdownInstanceDisks(self, inst)
5042 class LURemoveInstance(LogicalUnit):
5043 """Remove an instance.
5046 HPATH = "instance-remove"
5047 HTYPE = constants.HTYPE_INSTANCE
5050 ("ignore_failures", False, _TBool),
5055 def ExpandNames(self):
5056 self._ExpandAndLockInstance()
5057 self.needed_locks[locking.LEVEL_NODE] = []
5058 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5060 def DeclareLocks(self, level):
5061 if level == locking.LEVEL_NODE:
5062 self._LockInstancesNodes()
5064 def BuildHooksEnv(self):
5067 This runs on master, primary and secondary nodes of the instance.
5070 env = _BuildInstanceHookEnvByObject(self, self.instance)
5071 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5072 nl = [self.cfg.GetMasterNode()]
5073 nl_post = list(self.instance.all_nodes) + nl
5074 return env, nl, nl_post
5076 def CheckPrereq(self):
5077 """Check prerequisites.
5079 This checks that the instance is in the cluster.
5082 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5083 assert self.instance is not None, \
5084 "Cannot retrieve locked instance %s" % self.op.instance_name
5086 def Exec(self, feedback_fn):
5087 """Remove the instance.
5090 instance = self.instance
5091 logging.info("Shutting down instance %s on node %s",
5092 instance.name, instance.primary_node)
5094 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5095 self.op.shutdown_timeout)
5096 msg = result.fail_msg
5098 if self.op.ignore_failures:
5099 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5101 raise errors.OpExecError("Could not shutdown instance %s on"
5103 (instance.name, instance.primary_node, msg))
5105 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5108 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5109 """Utility function to remove an instance.
5112 logging.info("Removing block devices for instance %s", instance.name)
5114 if not _RemoveDisks(lu, instance):
5115 if not ignore_failures:
5116 raise errors.OpExecError("Can't remove instance's disks")
5117 feedback_fn("Warning: can't remove instance's disks")
5119 logging.info("Removing instance %s out of cluster config", instance.name)
5121 lu.cfg.RemoveInstance(instance.name)
5123 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5124 "Instance lock removal conflict"
5126 # Remove lock for the instance
5127 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5130 class LUQueryInstances(NoHooksLU):
5131 """Logical unit for querying instances.
5134 # pylint: disable-msg=W0142
5136 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5137 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5138 ("use_locking", False, _TBool),
5141 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5142 "serial_no", "ctime", "mtime", "uuid"]
5143 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5145 "disk_template", "ip", "mac", "bridge",
5146 "nic_mode", "nic_link",
5147 "sda_size", "sdb_size", "vcpus", "tags",
5148 "network_port", "beparams",
5149 r"(disk)\.(size)/([0-9]+)",
5150 r"(disk)\.(sizes)", "disk_usage",
5151 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5152 r"(nic)\.(bridge)/([0-9]+)",
5153 r"(nic)\.(macs|ips|modes|links|bridges)",
5154 r"(disk|nic)\.(count)",
5156 ] + _SIMPLE_FIELDS +
5158 for name in constants.HVS_PARAMETERS
5159 if name not in constants.HVC_GLOBALS] +
5161 for name in constants.BES_PARAMETERS])
5162 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5168 def CheckArguments(self):
5169 _CheckOutputFields(static=self._FIELDS_STATIC,
5170 dynamic=self._FIELDS_DYNAMIC,
5171 selected=self.op.output_fields)
5173 def ExpandNames(self):
5174 self.needed_locks = {}
5175 self.share_locks[locking.LEVEL_INSTANCE] = 1
5176 self.share_locks[locking.LEVEL_NODE] = 1
5179 self.wanted = _GetWantedInstances(self, self.op.names)
5181 self.wanted = locking.ALL_SET
5183 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5184 self.do_locking = self.do_node_query and self.op.use_locking
5186 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5187 self.needed_locks[locking.LEVEL_NODE] = []
5188 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5190 def DeclareLocks(self, level):
5191 if level == locking.LEVEL_NODE and self.do_locking:
5192 self._LockInstancesNodes()
5194 def Exec(self, feedback_fn):
5195 """Computes the list of nodes and their attributes.
5198 # pylint: disable-msg=R0912
5199 # way too many branches here
5200 all_info = self.cfg.GetAllInstancesInfo()
5201 if self.wanted == locking.ALL_SET:
5202 # caller didn't specify instance names, so ordering is not important
5204 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5206 instance_names = all_info.keys()
5207 instance_names = utils.NiceSort(instance_names)
5209 # caller did specify names, so we must keep the ordering
5211 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5213 tgt_set = all_info.keys()
5214 missing = set(self.wanted).difference(tgt_set)
5216 raise errors.OpExecError("Some instances were removed before"
5217 " retrieving their data: %s" % missing)
5218 instance_names = self.wanted
5220 instance_list = [all_info[iname] for iname in instance_names]
5222 # begin data gathering
5224 nodes = frozenset([inst.primary_node for inst in instance_list])
5225 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5229 if self.do_node_query:
5231 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5233 result = node_data[name]
5235 # offline nodes will be in both lists
5236 off_nodes.append(name)
5238 bad_nodes.append(name)
5241 live_data.update(result.payload)
5242 # else no instance is alive
5244 live_data = dict([(name, {}) for name in instance_names])
5246 # end data gathering
5251 cluster = self.cfg.GetClusterInfo()
5252 for instance in instance_list:
5254 i_hv = cluster.FillHV(instance, skip_globals=True)
5255 i_be = cluster.FillBE(instance)
5256 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5257 for field in self.op.output_fields:
5258 st_match = self._FIELDS_STATIC.Matches(field)
5259 if field in self._SIMPLE_FIELDS:
5260 val = getattr(instance, field)
5261 elif field == "pnode":
5262 val = instance.primary_node
5263 elif field == "snodes":
5264 val = list(instance.secondary_nodes)
5265 elif field == "admin_state":
5266 val = instance.admin_up
5267 elif field == "oper_state":
5268 if instance.primary_node in bad_nodes:
5271 val = bool(live_data.get(instance.name))
5272 elif field == "status":
5273 if instance.primary_node in off_nodes:
5274 val = "ERROR_nodeoffline"
5275 elif instance.primary_node in bad_nodes:
5276 val = "ERROR_nodedown"
5278 running = bool(live_data.get(instance.name))
5280 if instance.admin_up:
5285 if instance.admin_up:
5289 elif field == "oper_ram":
5290 if instance.primary_node in bad_nodes:
5292 elif instance.name in live_data:
5293 val = live_data[instance.name].get("memory", "?")
5296 elif field == "oper_vcpus":
5297 if instance.primary_node in bad_nodes:
5299 elif instance.name in live_data:
5300 val = live_data[instance.name].get("vcpus", "?")
5303 elif field == "vcpus":
5304 val = i_be[constants.BE_VCPUS]
5305 elif field == "disk_template":
5306 val = instance.disk_template
5309 val = instance.nics[0].ip
5312 elif field == "nic_mode":
5314 val = i_nicp[0][constants.NIC_MODE]
5317 elif field == "nic_link":
5319 val = i_nicp[0][constants.NIC_LINK]
5322 elif field == "bridge":
5323 if (instance.nics and
5324 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5325 val = i_nicp[0][constants.NIC_LINK]
5328 elif field == "mac":
5330 val = instance.nics[0].mac
5333 elif field == "sda_size" or field == "sdb_size":
5334 idx = ord(field[2]) - ord('a')
5336 val = instance.FindDisk(idx).size
5337 except errors.OpPrereqError:
5339 elif field == "disk_usage": # total disk usage per node
5340 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5341 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5342 elif field == "tags":
5343 val = list(instance.GetTags())
5344 elif field == "hvparams":
5346 elif (field.startswith(HVPREFIX) and
5347 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5348 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5349 val = i_hv.get(field[len(HVPREFIX):], None)
5350 elif field == "beparams":
5352 elif (field.startswith(BEPREFIX) and
5353 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5354 val = i_be.get(field[len(BEPREFIX):], None)
5355 elif st_match and st_match.groups():
5356 # matches a variable list
5357 st_groups = st_match.groups()
5358 if st_groups and st_groups[0] == "disk":
5359 if st_groups[1] == "count":
5360 val = len(instance.disks)
5361 elif st_groups[1] == "sizes":
5362 val = [disk.size for disk in instance.disks]
5363 elif st_groups[1] == "size":
5365 val = instance.FindDisk(st_groups[2]).size
5366 except errors.OpPrereqError:
5369 assert False, "Unhandled disk parameter"
5370 elif st_groups[0] == "nic":
5371 if st_groups[1] == "count":
5372 val = len(instance.nics)
5373 elif st_groups[1] == "macs":
5374 val = [nic.mac for nic in instance.nics]
5375 elif st_groups[1] == "ips":
5376 val = [nic.ip for nic in instance.nics]
5377 elif st_groups[1] == "modes":
5378 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5379 elif st_groups[1] == "links":
5380 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5381 elif st_groups[1] == "bridges":
5384 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5385 val.append(nicp[constants.NIC_LINK])
5390 nic_idx = int(st_groups[2])
5391 if nic_idx >= len(instance.nics):
5394 if st_groups[1] == "mac":
5395 val = instance.nics[nic_idx].mac
5396 elif st_groups[1] == "ip":
5397 val = instance.nics[nic_idx].ip
5398 elif st_groups[1] == "mode":
5399 val = i_nicp[nic_idx][constants.NIC_MODE]
5400 elif st_groups[1] == "link":
5401 val = i_nicp[nic_idx][constants.NIC_LINK]
5402 elif st_groups[1] == "bridge":
5403 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5404 if nic_mode == constants.NIC_MODE_BRIDGED:
5405 val = i_nicp[nic_idx][constants.NIC_LINK]
5409 assert False, "Unhandled NIC parameter"
5411 assert False, ("Declared but unhandled variable parameter '%s'" %
5414 assert False, "Declared but unhandled parameter '%s'" % field
5421 class LUFailoverInstance(LogicalUnit):
5422 """Failover an instance.
5425 HPATH = "instance-failover"
5426 HTYPE = constants.HTYPE_INSTANCE
5429 ("ignore_consistency", False, _TBool),
5434 def ExpandNames(self):
5435 self._ExpandAndLockInstance()
5436 self.needed_locks[locking.LEVEL_NODE] = []
5437 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5439 def DeclareLocks(self, level):
5440 if level == locking.LEVEL_NODE:
5441 self._LockInstancesNodes()
5443 def BuildHooksEnv(self):
5446 This runs on master, primary and secondary nodes of the instance.
5449 instance = self.instance
5450 source_node = instance.primary_node
5451 target_node = instance.secondary_nodes[0]
5453 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5454 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5455 "OLD_PRIMARY": source_node,
5456 "OLD_SECONDARY": target_node,
5457 "NEW_PRIMARY": target_node,
5458 "NEW_SECONDARY": source_node,
5460 env.update(_BuildInstanceHookEnvByObject(self, instance))
5461 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5463 nl_post.append(source_node)
5464 return env, nl, nl_post
5466 def CheckPrereq(self):
5467 """Check prerequisites.
5469 This checks that the instance is in the cluster.
5472 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5473 assert self.instance is not None, \
5474 "Cannot retrieve locked instance %s" % self.op.instance_name
5476 bep = self.cfg.GetClusterInfo().FillBE(instance)
5477 if instance.disk_template not in constants.DTS_NET_MIRROR:
5478 raise errors.OpPrereqError("Instance's disk layout is not"
5479 " network mirrored, cannot failover.",
5482 secondary_nodes = instance.secondary_nodes
5483 if not secondary_nodes:
5484 raise errors.ProgrammerError("no secondary node but using "
5485 "a mirrored disk template")
5487 target_node = secondary_nodes[0]
5488 _CheckNodeOnline(self, target_node)
5489 _CheckNodeNotDrained(self, target_node)
5490 if instance.admin_up:
5491 # check memory requirements on the secondary node
5492 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5493 instance.name, bep[constants.BE_MEMORY],
5494 instance.hypervisor)
5496 self.LogInfo("Not checking memory on the secondary node as"
5497 " instance will not be started")
5499 # check bridge existance
5500 _CheckInstanceBridgesExist(self, instance, node=target_node)
5502 def Exec(self, feedback_fn):
5503 """Failover an instance.
5505 The failover is done by shutting it down on its present node and
5506 starting it on the secondary.
5509 instance = self.instance
5511 source_node = instance.primary_node
5512 target_node = instance.secondary_nodes[0]
5514 if instance.admin_up:
5515 feedback_fn("* checking disk consistency between source and target")
5516 for dev in instance.disks:
5517 # for drbd, these are drbd over lvm
5518 if not _CheckDiskConsistency(self, dev, target_node, False):
5519 if not self.op.ignore_consistency:
5520 raise errors.OpExecError("Disk %s is degraded on target node,"
5521 " aborting failover." % dev.iv_name)
5523 feedback_fn("* not checking disk consistency as instance is not running")
5525 feedback_fn("* shutting down instance on source node")
5526 logging.info("Shutting down instance %s on node %s",
5527 instance.name, source_node)
5529 result = self.rpc.call_instance_shutdown(source_node, instance,
5530 self.op.shutdown_timeout)
5531 msg = result.fail_msg
5533 if self.op.ignore_consistency:
5534 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5535 " Proceeding anyway. Please make sure node"
5536 " %s is down. Error details: %s",
5537 instance.name, source_node, source_node, msg)
5539 raise errors.OpExecError("Could not shutdown instance %s on"
5541 (instance.name, source_node, msg))
5543 feedback_fn("* deactivating the instance's disks on source node")
5544 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5545 raise errors.OpExecError("Can't shut down the instance's disks.")
5547 instance.primary_node = target_node
5548 # distribute new instance config to the other nodes
5549 self.cfg.Update(instance, feedback_fn)
5551 # Only start the instance if it's marked as up
5552 if instance.admin_up:
5553 feedback_fn("* activating the instance's disks on target node")
5554 logging.info("Starting instance %s on node %s",
5555 instance.name, target_node)
5557 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5558 ignore_secondaries=True)
5560 _ShutdownInstanceDisks(self, instance)
5561 raise errors.OpExecError("Can't activate the instance's disks")
5563 feedback_fn("* starting the instance on the target node")
5564 result = self.rpc.call_instance_start(target_node, instance, None, None)
5565 msg = result.fail_msg
5567 _ShutdownInstanceDisks(self, instance)
5568 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5569 (instance.name, target_node, msg))
5572 class LUMigrateInstance(LogicalUnit):
5573 """Migrate an instance.
5575 This is migration without shutting down, compared to the failover,
5576 which is done with shutdown.
5579 HPATH = "instance-migrate"
5580 HTYPE = constants.HTYPE_INSTANCE
5585 ("cleanup", False, _TBool),
5590 def ExpandNames(self):
5591 self._ExpandAndLockInstance()
5593 self.needed_locks[locking.LEVEL_NODE] = []
5594 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5596 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5598 self.tasklets = [self._migrater]
5600 def DeclareLocks(self, level):
5601 if level == locking.LEVEL_NODE:
5602 self._LockInstancesNodes()
5604 def BuildHooksEnv(self):
5607 This runs on master, primary and secondary nodes of the instance.
5610 instance = self._migrater.instance
5611 source_node = instance.primary_node
5612 target_node = instance.secondary_nodes[0]
5613 env = _BuildInstanceHookEnvByObject(self, instance)
5614 env["MIGRATE_LIVE"] = self._migrater.live
5615 env["MIGRATE_CLEANUP"] = self.op.cleanup
5617 "OLD_PRIMARY": source_node,
5618 "OLD_SECONDARY": target_node,
5619 "NEW_PRIMARY": target_node,
5620 "NEW_SECONDARY": source_node,
5622 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5624 nl_post.append(source_node)
5625 return env, nl, nl_post
5628 class LUMoveInstance(LogicalUnit):
5629 """Move an instance by data-copying.
5632 HPATH = "instance-move"
5633 HTYPE = constants.HTYPE_INSTANCE
5636 ("target_node", _NoDefault, _TNonEmptyString),
5641 def ExpandNames(self):
5642 self._ExpandAndLockInstance()
5643 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5644 self.op.target_node = target_node
5645 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5646 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5648 def DeclareLocks(self, level):
5649 if level == locking.LEVEL_NODE:
5650 self._LockInstancesNodes(primary_only=True)
5652 def BuildHooksEnv(self):
5655 This runs on master, primary and secondary nodes of the instance.
5659 "TARGET_NODE": self.op.target_node,
5660 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5662 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5663 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5664 self.op.target_node]
5667 def CheckPrereq(self):
5668 """Check prerequisites.
5670 This checks that the instance is in the cluster.
5673 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5674 assert self.instance is not None, \
5675 "Cannot retrieve locked instance %s" % self.op.instance_name
5677 node = self.cfg.GetNodeInfo(self.op.target_node)
5678 assert node is not None, \
5679 "Cannot retrieve locked node %s" % self.op.target_node
5681 self.target_node = target_node = node.name
5683 if target_node == instance.primary_node:
5684 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5685 (instance.name, target_node),
5688 bep = self.cfg.GetClusterInfo().FillBE(instance)
5690 for idx, dsk in enumerate(instance.disks):
5691 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5692 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5693 " cannot copy" % idx, errors.ECODE_STATE)
5695 _CheckNodeOnline(self, target_node)
5696 _CheckNodeNotDrained(self, target_node)
5698 if instance.admin_up:
5699 # check memory requirements on the secondary node
5700 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5701 instance.name, bep[constants.BE_MEMORY],
5702 instance.hypervisor)
5704 self.LogInfo("Not checking memory on the secondary node as"
5705 " instance will not be started")
5707 # check bridge existance
5708 _CheckInstanceBridgesExist(self, instance, node=target_node)
5710 def Exec(self, feedback_fn):
5711 """Move an instance.
5713 The move is done by shutting it down on its present node, copying
5714 the data over (slow) and starting it on the new node.
5717 instance = self.instance
5719 source_node = instance.primary_node
5720 target_node = self.target_node
5722 self.LogInfo("Shutting down instance %s on source node %s",
5723 instance.name, source_node)
5725 result = self.rpc.call_instance_shutdown(source_node, instance,
5726 self.op.shutdown_timeout)
5727 msg = result.fail_msg
5729 if self.op.ignore_consistency:
5730 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5731 " Proceeding anyway. Please make sure node"
5732 " %s is down. Error details: %s",
5733 instance.name, source_node, source_node, msg)
5735 raise errors.OpExecError("Could not shutdown instance %s on"
5737 (instance.name, source_node, msg))
5739 # create the target disks
5741 _CreateDisks(self, instance, target_node=target_node)
5742 except errors.OpExecError:
5743 self.LogWarning("Device creation failed, reverting...")
5745 _RemoveDisks(self, instance, target_node=target_node)
5747 self.cfg.ReleaseDRBDMinors(instance.name)
5750 cluster_name = self.cfg.GetClusterInfo().cluster_name
5753 # activate, get path, copy the data over
5754 for idx, disk in enumerate(instance.disks):
5755 self.LogInfo("Copying data for disk %d", idx)
5756 result = self.rpc.call_blockdev_assemble(target_node, disk,
5757 instance.name, True)
5759 self.LogWarning("Can't assemble newly created disk %d: %s",
5760 idx, result.fail_msg)
5761 errs.append(result.fail_msg)
5763 dev_path = result.payload
5764 result = self.rpc.call_blockdev_export(source_node, disk,
5765 target_node, dev_path,
5768 self.LogWarning("Can't copy data over for disk %d: %s",
5769 idx, result.fail_msg)
5770 errs.append(result.fail_msg)
5774 self.LogWarning("Some disks failed to copy, aborting")
5776 _RemoveDisks(self, instance, target_node=target_node)
5778 self.cfg.ReleaseDRBDMinors(instance.name)
5779 raise errors.OpExecError("Errors during disk copy: %s" %
5782 instance.primary_node = target_node
5783 self.cfg.Update(instance, feedback_fn)
5785 self.LogInfo("Removing the disks on the original node")
5786 _RemoveDisks(self, instance, target_node=source_node)
5788 # Only start the instance if it's marked as up
5789 if instance.admin_up:
5790 self.LogInfo("Starting instance %s on node %s",
5791 instance.name, target_node)
5793 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5794 ignore_secondaries=True)
5796 _ShutdownInstanceDisks(self, instance)
5797 raise errors.OpExecError("Can't activate the instance's disks")
5799 result = self.rpc.call_instance_start(target_node, instance, None, None)
5800 msg = result.fail_msg
5802 _ShutdownInstanceDisks(self, instance)
5803 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5804 (instance.name, target_node, msg))
5807 class LUMigrateNode(LogicalUnit):
5808 """Migrate all instances from a node.
5811 HPATH = "node-migrate"
5812 HTYPE = constants.HTYPE_NODE
5820 def ExpandNames(self):
5821 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5823 self.needed_locks = {
5824 locking.LEVEL_NODE: [self.op.node_name],
5827 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5829 # Create tasklets for migrating instances for all instances on this node
5833 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5834 logging.debug("Migrating instance %s", inst.name)
5835 names.append(inst.name)
5837 tasklets.append(TLMigrateInstance(self, inst.name, False))
5839 self.tasklets = tasklets
5841 # Declare instance locks
5842 self.needed_locks[locking.LEVEL_INSTANCE] = names
5844 def DeclareLocks(self, level):
5845 if level == locking.LEVEL_NODE:
5846 self._LockInstancesNodes()
5848 def BuildHooksEnv(self):
5851 This runs on the master, the primary and all the secondaries.
5855 "NODE_NAME": self.op.node_name,
5858 nl = [self.cfg.GetMasterNode()]
5860 return (env, nl, nl)
5863 class TLMigrateInstance(Tasklet):
5864 """Tasklet class for instance migration.
5867 @ivar live: whether the migration will be done live or non-live;
5868 this variable is initalized only after CheckPrereq has run
5871 def __init__(self, lu, instance_name, cleanup):
5872 """Initializes this class.
5875 Tasklet.__init__(self, lu)
5878 self.instance_name = instance_name
5879 self.cleanup = cleanup
5880 self.live = False # will be overridden later
5882 def CheckPrereq(self):
5883 """Check prerequisites.
5885 This checks that the instance is in the cluster.
5888 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5889 instance = self.cfg.GetInstanceInfo(instance_name)
5890 assert instance is not None
5892 if instance.disk_template != constants.DT_DRBD8:
5893 raise errors.OpPrereqError("Instance's disk layout is not"
5894 " drbd8, cannot migrate.", errors.ECODE_STATE)
5896 secondary_nodes = instance.secondary_nodes
5897 if not secondary_nodes:
5898 raise errors.ConfigurationError("No secondary node but using"
5899 " drbd8 disk template")
5901 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5903 target_node = secondary_nodes[0]
5904 # check memory requirements on the secondary node
5905 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5906 instance.name, i_be[constants.BE_MEMORY],
5907 instance.hypervisor)
5909 # check bridge existance
5910 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5912 if not self.cleanup:
5913 _CheckNodeNotDrained(self.lu, target_node)
5914 result = self.rpc.call_instance_migratable(instance.primary_node,
5916 result.Raise("Can't migrate, please use failover",
5917 prereq=True, ecode=errors.ECODE_STATE)
5919 self.instance = instance
5921 if self.lu.op.live is not None and self.lu.op.mode is not None:
5922 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5923 " parameters are accepted",
5925 if self.lu.op.live is not None:
5927 self.lu.op.mode = constants.HT_MIGRATION_LIVE
5929 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5930 # reset the 'live' parameter to None so that repeated
5931 # invocations of CheckPrereq do not raise an exception
5932 self.lu.op.live = None
5933 elif self.lu.op.mode is None:
5934 # read the default value from the hypervisor
5935 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5936 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5938 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5940 def _WaitUntilSync(self):
5941 """Poll with custom rpc for disk sync.
5943 This uses our own step-based rpc call.
5946 self.feedback_fn("* wait until resync is done")
5950 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5952 self.instance.disks)
5954 for node, nres in result.items():
5955 nres.Raise("Cannot resync disks on node %s" % node)
5956 node_done, node_percent = nres.payload
5957 all_done = all_done and node_done
5958 if node_percent is not None:
5959 min_percent = min(min_percent, node_percent)
5961 if min_percent < 100:
5962 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5965 def _EnsureSecondary(self, node):
5966 """Demote a node to secondary.
5969 self.feedback_fn("* switching node %s to secondary mode" % node)
5971 for dev in self.instance.disks:
5972 self.cfg.SetDiskID(dev, node)
5974 result = self.rpc.call_blockdev_close(node, self.instance.name,
5975 self.instance.disks)
5976 result.Raise("Cannot change disk to secondary on node %s" % node)
5978 def _GoStandalone(self):
5979 """Disconnect from the network.
5982 self.feedback_fn("* changing into standalone mode")
5983 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5984 self.instance.disks)
5985 for node, nres in result.items():
5986 nres.Raise("Cannot disconnect disks node %s" % node)
5988 def _GoReconnect(self, multimaster):
5989 """Reconnect to the network.
5995 msg = "single-master"
5996 self.feedback_fn("* changing disks into %s mode" % msg)
5997 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5998 self.instance.disks,
5999 self.instance.name, multimaster)
6000 for node, nres in result.items():
6001 nres.Raise("Cannot change disks config on node %s" % node)
6003 def _ExecCleanup(self):
6004 """Try to cleanup after a failed migration.
6006 The cleanup is done by:
6007 - check that the instance is running only on one node
6008 (and update the config if needed)
6009 - change disks on its secondary node to secondary
6010 - wait until disks are fully synchronized
6011 - disconnect from the network
6012 - change disks into single-master mode
6013 - wait again until disks are fully synchronized
6016 instance = self.instance
6017 target_node = self.target_node
6018 source_node = self.source_node
6020 # check running on only one node
6021 self.feedback_fn("* checking where the instance actually runs"
6022 " (if this hangs, the hypervisor might be in"
6024 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6025 for node, result in ins_l.items():
6026 result.Raise("Can't contact node %s" % node)
6028 runningon_source = instance.name in ins_l[source_node].payload
6029 runningon_target = instance.name in ins_l[target_node].payload
6031 if runningon_source and runningon_target:
6032 raise errors.OpExecError("Instance seems to be running on two nodes,"
6033 " or the hypervisor is confused. You will have"
6034 " to ensure manually that it runs only on one"
6035 " and restart this operation.")
6037 if not (runningon_source or runningon_target):
6038 raise errors.OpExecError("Instance does not seem to be running at all."
6039 " In this case, it's safer to repair by"
6040 " running 'gnt-instance stop' to ensure disk"
6041 " shutdown, and then restarting it.")
6043 if runningon_target:
6044 # the migration has actually succeeded, we need to update the config
6045 self.feedback_fn("* instance running on secondary node (%s),"
6046 " updating config" % target_node)
6047 instance.primary_node = target_node
6048 self.cfg.Update(instance, self.feedback_fn)
6049 demoted_node = source_node
6051 self.feedback_fn("* instance confirmed to be running on its"
6052 " primary node (%s)" % source_node)
6053 demoted_node = target_node
6055 self._EnsureSecondary(demoted_node)
6057 self._WaitUntilSync()
6058 except errors.OpExecError:
6059 # we ignore here errors, since if the device is standalone, it
6060 # won't be able to sync
6062 self._GoStandalone()
6063 self._GoReconnect(False)
6064 self._WaitUntilSync()
6066 self.feedback_fn("* done")
6068 def _RevertDiskStatus(self):
6069 """Try to revert the disk status after a failed migration.
6072 target_node = self.target_node
6074 self._EnsureSecondary(target_node)
6075 self._GoStandalone()
6076 self._GoReconnect(False)
6077 self._WaitUntilSync()
6078 except errors.OpExecError, err:
6079 self.lu.LogWarning("Migration failed and I can't reconnect the"
6080 " drives: error '%s'\n"
6081 "Please look and recover the instance status" %
6084 def _AbortMigration(self):
6085 """Call the hypervisor code to abort a started migration.
6088 instance = self.instance
6089 target_node = self.target_node
6090 migration_info = self.migration_info
6092 abort_result = self.rpc.call_finalize_migration(target_node,
6096 abort_msg = abort_result.fail_msg
6098 logging.error("Aborting migration failed on target node %s: %s",
6099 target_node, abort_msg)
6100 # Don't raise an exception here, as we stil have to try to revert the
6101 # disk status, even if this step failed.
6103 def _ExecMigration(self):
6104 """Migrate an instance.
6106 The migrate is done by:
6107 - change the disks into dual-master mode
6108 - wait until disks are fully synchronized again
6109 - migrate the instance
6110 - change disks on the new secondary node (the old primary) to secondary
6111 - wait until disks are fully synchronized
6112 - change disks into single-master mode
6115 instance = self.instance
6116 target_node = self.target_node
6117 source_node = self.source_node
6119 self.feedback_fn("* checking disk consistency between source and target")
6120 for dev in instance.disks:
6121 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6122 raise errors.OpExecError("Disk %s is degraded or not fully"
6123 " synchronized on target node,"
6124 " aborting migrate." % dev.iv_name)
6126 # First get the migration information from the remote node
6127 result = self.rpc.call_migration_info(source_node, instance)
6128 msg = result.fail_msg
6130 log_err = ("Failed fetching source migration information from %s: %s" %
6132 logging.error(log_err)
6133 raise errors.OpExecError(log_err)
6135 self.migration_info = migration_info = result.payload
6137 # Then switch the disks to master/master mode
6138 self._EnsureSecondary(target_node)
6139 self._GoStandalone()
6140 self._GoReconnect(True)
6141 self._WaitUntilSync()
6143 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6144 result = self.rpc.call_accept_instance(target_node,
6147 self.nodes_ip[target_node])
6149 msg = result.fail_msg
6151 logging.error("Instance pre-migration failed, trying to revert"
6152 " disk status: %s", msg)
6153 self.feedback_fn("Pre-migration failed, aborting")
6154 self._AbortMigration()
6155 self._RevertDiskStatus()
6156 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6157 (instance.name, msg))
6159 self.feedback_fn("* migrating instance to %s" % target_node)
6161 result = self.rpc.call_instance_migrate(source_node, instance,
6162 self.nodes_ip[target_node],
6164 msg = result.fail_msg
6166 logging.error("Instance migration failed, trying to revert"
6167 " disk status: %s", msg)
6168 self.feedback_fn("Migration failed, aborting")
6169 self._AbortMigration()
6170 self._RevertDiskStatus()
6171 raise errors.OpExecError("Could not migrate instance %s: %s" %
6172 (instance.name, msg))
6175 instance.primary_node = target_node
6176 # distribute new instance config to the other nodes
6177 self.cfg.Update(instance, self.feedback_fn)
6179 result = self.rpc.call_finalize_migration(target_node,
6183 msg = result.fail_msg
6185 logging.error("Instance migration succeeded, but finalization failed:"
6187 raise errors.OpExecError("Could not finalize instance migration: %s" %
6190 self._EnsureSecondary(source_node)
6191 self._WaitUntilSync()
6192 self._GoStandalone()
6193 self._GoReconnect(False)
6194 self._WaitUntilSync()
6196 self.feedback_fn("* done")
6198 def Exec(self, feedback_fn):
6199 """Perform the migration.
6202 feedback_fn("Migrating instance %s" % self.instance.name)
6204 self.feedback_fn = feedback_fn
6206 self.source_node = self.instance.primary_node
6207 self.target_node = self.instance.secondary_nodes[0]
6208 self.all_nodes = [self.source_node, self.target_node]
6210 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6211 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6215 return self._ExecCleanup()
6217 return self._ExecMigration()
6220 def _CreateBlockDev(lu, node, instance, device, force_create,
6222 """Create a tree of block devices on a given node.
6224 If this device type has to be created on secondaries, create it and
6227 If not, just recurse to children keeping the same 'force' value.
6229 @param lu: the lu on whose behalf we execute
6230 @param node: the node on which to create the device
6231 @type instance: L{objects.Instance}
6232 @param instance: the instance which owns the device
6233 @type device: L{objects.Disk}
6234 @param device: the device to create
6235 @type force_create: boolean
6236 @param force_create: whether to force creation of this device; this
6237 will be change to True whenever we find a device which has
6238 CreateOnSecondary() attribute
6239 @param info: the extra 'metadata' we should attach to the device
6240 (this will be represented as a LVM tag)
6241 @type force_open: boolean
6242 @param force_open: this parameter will be passes to the
6243 L{backend.BlockdevCreate} function where it specifies
6244 whether we run on primary or not, and it affects both
6245 the child assembly and the device own Open() execution
6248 if device.CreateOnSecondary():
6252 for child in device.children:
6253 _CreateBlockDev(lu, node, instance, child, force_create,
6256 if not force_create:
6259 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6262 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6263 """Create a single block device on a given node.
6265 This will not recurse over children of the device, so they must be
6268 @param lu: the lu on whose behalf we execute
6269 @param node: the node on which to create the device
6270 @type instance: L{objects.Instance}
6271 @param instance: the instance which owns the device
6272 @type device: L{objects.Disk}
6273 @param device: the device to create
6274 @param info: the extra 'metadata' we should attach to the device
6275 (this will be represented as a LVM tag)
6276 @type force_open: boolean
6277 @param force_open: this parameter will be passes to the
6278 L{backend.BlockdevCreate} function where it specifies
6279 whether we run on primary or not, and it affects both
6280 the child assembly and the device own Open() execution
6283 lu.cfg.SetDiskID(device, node)
6284 result = lu.rpc.call_blockdev_create(node, device, device.size,
6285 instance.name, force_open, info)
6286 result.Raise("Can't create block device %s on"
6287 " node %s for instance %s" % (device, node, instance.name))
6288 if device.physical_id is None:
6289 device.physical_id = result.payload
6292 def _GenerateUniqueNames(lu, exts):
6293 """Generate a suitable LV name.
6295 This will generate a logical volume name for the given instance.
6300 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6301 results.append("%s%s" % (new_id, val))
6305 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6307 """Generate a drbd8 device complete with its children.
6310 port = lu.cfg.AllocatePort()
6311 vgname = lu.cfg.GetVGName()
6312 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6313 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6314 logical_id=(vgname, names[0]))
6315 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6316 logical_id=(vgname, names[1]))
6317 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6318 logical_id=(primary, secondary, port,
6321 children=[dev_data, dev_meta],
6326 def _GenerateDiskTemplate(lu, template_name,
6327 instance_name, primary_node,
6328 secondary_nodes, disk_info,
6329 file_storage_dir, file_driver,
6331 """Generate the entire disk layout for a given template type.
6334 #TODO: compute space requirements
6336 vgname = lu.cfg.GetVGName()
6337 disk_count = len(disk_info)
6339 if template_name == constants.DT_DISKLESS:
6341 elif template_name == constants.DT_PLAIN:
6342 if len(secondary_nodes) != 0:
6343 raise errors.ProgrammerError("Wrong template configuration")
6345 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6346 for i in range(disk_count)])
6347 for idx, disk in enumerate(disk_info):
6348 disk_index = idx + base_index
6349 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6350 logical_id=(vgname, names[idx]),
6351 iv_name="disk/%d" % disk_index,
6353 disks.append(disk_dev)
6354 elif template_name == constants.DT_DRBD8:
6355 if len(secondary_nodes) != 1:
6356 raise errors.ProgrammerError("Wrong template configuration")
6357 remote_node = secondary_nodes[0]
6358 minors = lu.cfg.AllocateDRBDMinor(
6359 [primary_node, remote_node] * len(disk_info), instance_name)
6362 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6363 for i in range(disk_count)]):
6364 names.append(lv_prefix + "_data")
6365 names.append(lv_prefix + "_meta")
6366 for idx, disk in enumerate(disk_info):
6367 disk_index = idx + base_index
6368 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6369 disk["size"], names[idx*2:idx*2+2],
6370 "disk/%d" % disk_index,
6371 minors[idx*2], minors[idx*2+1])
6372 disk_dev.mode = disk["mode"]
6373 disks.append(disk_dev)
6374 elif template_name == constants.DT_FILE:
6375 if len(secondary_nodes) != 0:
6376 raise errors.ProgrammerError("Wrong template configuration")
6378 _RequireFileStorage()
6380 for idx, disk in enumerate(disk_info):
6381 disk_index = idx + base_index
6382 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6383 iv_name="disk/%d" % disk_index,
6384 logical_id=(file_driver,
6385 "%s/disk%d" % (file_storage_dir,
6388 disks.append(disk_dev)
6390 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6394 def _GetInstanceInfoText(instance):
6395 """Compute that text that should be added to the disk's metadata.
6398 return "originstname+%s" % instance.name
6401 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6402 """Create all disks for an instance.
6404 This abstracts away some work from AddInstance.
6406 @type lu: L{LogicalUnit}
6407 @param lu: the logical unit on whose behalf we execute
6408 @type instance: L{objects.Instance}
6409 @param instance: the instance whose disks we should create
6411 @param to_skip: list of indices to skip
6412 @type target_node: string
6413 @param target_node: if passed, overrides the target node for creation
6415 @return: the success of the creation
6418 info = _GetInstanceInfoText(instance)
6419 if target_node is None:
6420 pnode = instance.primary_node
6421 all_nodes = instance.all_nodes
6426 if instance.disk_template == constants.DT_FILE:
6427 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6428 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6430 result.Raise("Failed to create directory '%s' on"
6431 " node %s" % (file_storage_dir, pnode))
6433 # Note: this needs to be kept in sync with adding of disks in
6434 # LUSetInstanceParams
6435 for idx, device in enumerate(instance.disks):
6436 if to_skip and idx in to_skip:
6438 logging.info("Creating volume %s for instance %s",
6439 device.iv_name, instance.name)
6441 for node in all_nodes:
6442 f_create = node == pnode
6443 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6446 def _RemoveDisks(lu, instance, target_node=None):
6447 """Remove all disks for an instance.
6449 This abstracts away some work from `AddInstance()` and
6450 `RemoveInstance()`. Note that in case some of the devices couldn't
6451 be removed, the removal will continue with the other ones (compare
6452 with `_CreateDisks()`).
6454 @type lu: L{LogicalUnit}
6455 @param lu: the logical unit on whose behalf we execute
6456 @type instance: L{objects.Instance}
6457 @param instance: the instance whose disks we should remove
6458 @type target_node: string
6459 @param target_node: used to override the node on which to remove the disks
6461 @return: the success of the removal
6464 logging.info("Removing block devices for instance %s", instance.name)
6467 for device in instance.disks:
6469 edata = [(target_node, device)]
6471 edata = device.ComputeNodeTree(instance.primary_node)
6472 for node, disk in edata:
6473 lu.cfg.SetDiskID(disk, node)
6474 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6476 lu.LogWarning("Could not remove block device %s on node %s,"
6477 " continuing anyway: %s", device.iv_name, node, msg)
6480 if instance.disk_template == constants.DT_FILE:
6481 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6485 tgt = instance.primary_node
6486 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6488 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6489 file_storage_dir, instance.primary_node, result.fail_msg)
6495 def _ComputeDiskSize(disk_template, disks):
6496 """Compute disk size requirements in the volume group
6499 # Required free disk space as a function of disk and swap space
6501 constants.DT_DISKLESS: None,
6502 constants.DT_PLAIN: sum(d["size"] for d in disks),
6503 # 128 MB are added for drbd metadata for each disk
6504 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6505 constants.DT_FILE: None,
6508 if disk_template not in req_size_dict:
6509 raise errors.ProgrammerError("Disk template '%s' size requirement"
6510 " is unknown" % disk_template)
6512 return req_size_dict[disk_template]
6515 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6516 """Hypervisor parameter validation.
6518 This function abstract the hypervisor parameter validation to be
6519 used in both instance create and instance modify.
6521 @type lu: L{LogicalUnit}
6522 @param lu: the logical unit for which we check
6523 @type nodenames: list
6524 @param nodenames: the list of nodes on which we should check
6525 @type hvname: string
6526 @param hvname: the name of the hypervisor we should use
6527 @type hvparams: dict
6528 @param hvparams: the parameters which we need to check
6529 @raise errors.OpPrereqError: if the parameters are not valid
6532 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6535 for node in nodenames:
6539 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6542 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6543 """OS parameters validation.
6545 @type lu: L{LogicalUnit}
6546 @param lu: the logical unit for which we check
6547 @type required: boolean
6548 @param required: whether the validation should fail if the OS is not
6550 @type nodenames: list
6551 @param nodenames: the list of nodes on which we should check
6552 @type osname: string
6553 @param osname: the name of the hypervisor we should use
6554 @type osparams: dict
6555 @param osparams: the parameters which we need to check
6556 @raise errors.OpPrereqError: if the parameters are not valid
6559 result = lu.rpc.call_os_validate(required, nodenames, osname,
6560 [constants.OS_VALIDATE_PARAMETERS],
6562 for node, nres in result.items():
6563 # we don't check for offline cases since this should be run only
6564 # against the master node and/or an instance's nodes
6565 nres.Raise("OS Parameters validation failed on node %s" % node)
6566 if not nres.payload:
6567 lu.LogInfo("OS %s not found on node %s, validation skipped",
6571 class LUCreateInstance(LogicalUnit):
6572 """Create an instance.
6575 HPATH = "instance-add"
6576 HTYPE = constants.HTYPE_INSTANCE
6579 ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6580 ("start", True, _TBool),
6581 ("wait_for_sync", True, _TBool),
6582 ("ip_check", True, _TBool),
6583 ("name_check", True, _TBool),
6584 ("disks", _NoDefault, _TListOf(_TDict)),
6585 ("nics", _NoDefault, _TListOf(_TDict)),
6586 ("hvparams", _EmptyDict, _TDict),
6587 ("beparams", _EmptyDict, _TDict),
6588 ("osparams", _EmptyDict, _TDict),
6589 ("no_install", None, _TMaybeBool),
6590 ("os_type", None, _TMaybeString),
6591 ("force_variant", False, _TBool),
6592 ("source_handshake", None, _TOr(_TList, _TNone)),
6593 ("source_x509_ca", None, _TMaybeString),
6594 ("source_instance_name", None, _TMaybeString),
6595 ("src_node", None, _TMaybeString),
6596 ("src_path", None, _TMaybeString),
6597 ("pnode", None, _TMaybeString),
6598 ("snode", None, _TMaybeString),
6599 ("iallocator", None, _TMaybeString),
6600 ("hypervisor", None, _TMaybeString),
6601 ("disk_template", _NoDefault, _CheckDiskTemplate),
6602 ("identify_defaults", False, _TBool),
6603 ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6604 ("file_storage_dir", None, _TMaybeString),
6608 def CheckArguments(self):
6612 # do not require name_check to ease forward/backward compatibility
6614 if self.op.no_install and self.op.start:
6615 self.LogInfo("No-installation mode selected, disabling startup")
6616 self.op.start = False
6617 # validate/normalize the instance name
6618 self.op.instance_name = \
6619 netutils.HostInfo.NormalizeName(self.op.instance_name)
6621 if self.op.ip_check and not self.op.name_check:
6622 # TODO: make the ip check more flexible and not depend on the name check
6623 raise errors.OpPrereqError("Cannot do ip check without a name check",
6626 # check nics' parameter names
6627 for nic in self.op.nics:
6628 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6630 # check disks. parameter names and consistent adopt/no-adopt strategy
6631 has_adopt = has_no_adopt = False
6632 for disk in self.op.disks:
6633 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6638 if has_adopt and has_no_adopt:
6639 raise errors.OpPrereqError("Either all disks are adopted or none is",
6642 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6643 raise errors.OpPrereqError("Disk adoption is not supported for the"
6644 " '%s' disk template" %
6645 self.op.disk_template,
6647 if self.op.iallocator is not None:
6648 raise errors.OpPrereqError("Disk adoption not allowed with an"
6649 " iallocator script", errors.ECODE_INVAL)
6650 if self.op.mode == constants.INSTANCE_IMPORT:
6651 raise errors.OpPrereqError("Disk adoption not allowed for"
6652 " instance import", errors.ECODE_INVAL)
6654 self.adopt_disks = has_adopt
6656 # instance name verification
6657 if self.op.name_check:
6658 self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6659 self.op.instance_name = self.hostname1.name
6660 # used in CheckPrereq for ip ping check
6661 self.check_ip = self.hostname1.ip
6663 self.check_ip = None
6665 # file storage checks
6666 if (self.op.file_driver and
6667 not self.op.file_driver in constants.FILE_DRIVER):
6668 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6669 self.op.file_driver, errors.ECODE_INVAL)
6671 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6672 raise errors.OpPrereqError("File storage directory path not absolute",
6675 ### Node/iallocator related checks
6676 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6678 if self.op.pnode is not None:
6679 if self.op.disk_template in constants.DTS_NET_MIRROR:
6680 if self.op.snode is None:
6681 raise errors.OpPrereqError("The networked disk templates need"
6682 " a mirror node", errors.ECODE_INVAL)
6684 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6686 self.op.snode = None
6688 self._cds = _GetClusterDomainSecret()
6690 if self.op.mode == constants.INSTANCE_IMPORT:
6691 # On import force_variant must be True, because if we forced it at
6692 # initial install, our only chance when importing it back is that it
6694 self.op.force_variant = True
6696 if self.op.no_install:
6697 self.LogInfo("No-installation mode has no effect during import")
6699 elif self.op.mode == constants.INSTANCE_CREATE:
6700 if self.op.os_type is None:
6701 raise errors.OpPrereqError("No guest OS specified",
6703 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6704 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6705 " installation" % self.op.os_type,
6707 if self.op.disk_template is None:
6708 raise errors.OpPrereqError("No disk template specified",
6711 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6712 # Check handshake to ensure both clusters have the same domain secret
6713 src_handshake = self.op.source_handshake
6714 if not src_handshake:
6715 raise errors.OpPrereqError("Missing source handshake",
6718 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6721 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6724 # Load and check source CA
6725 self.source_x509_ca_pem = self.op.source_x509_ca
6726 if not self.source_x509_ca_pem:
6727 raise errors.OpPrereqError("Missing source X509 CA",
6731 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6733 except OpenSSL.crypto.Error, err:
6734 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6735 (err, ), errors.ECODE_INVAL)
6737 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6738 if errcode is not None:
6739 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6742 self.source_x509_ca = cert
6744 src_instance_name = self.op.source_instance_name
6745 if not src_instance_name:
6746 raise errors.OpPrereqError("Missing source instance name",
6749 norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6750 self.source_instance_name = netutils.GetHostInfo(norm_name).name
6753 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6754 self.op.mode, errors.ECODE_INVAL)
6756 def ExpandNames(self):
6757 """ExpandNames for CreateInstance.
6759 Figure out the right locks for instance creation.
6762 self.needed_locks = {}
6764 instance_name = self.op.instance_name
6765 # this is just a preventive check, but someone might still add this
6766 # instance in the meantime, and creation will fail at lock-add time
6767 if instance_name in self.cfg.GetInstanceList():
6768 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6769 instance_name, errors.ECODE_EXISTS)
6771 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6773 if self.op.iallocator:
6774 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6776 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6777 nodelist = [self.op.pnode]
6778 if self.op.snode is not None:
6779 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6780 nodelist.append(self.op.snode)
6781 self.needed_locks[locking.LEVEL_NODE] = nodelist
6783 # in case of import lock the source node too
6784 if self.op.mode == constants.INSTANCE_IMPORT:
6785 src_node = self.op.src_node
6786 src_path = self.op.src_path
6788 if src_path is None:
6789 self.op.src_path = src_path = self.op.instance_name
6791 if src_node is None:
6792 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6793 self.op.src_node = None
6794 if os.path.isabs(src_path):
6795 raise errors.OpPrereqError("Importing an instance from an absolute"
6796 " path requires a source node option.",
6799 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6800 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6801 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6802 if not os.path.isabs(src_path):
6803 self.op.src_path = src_path = \
6804 utils.PathJoin(constants.EXPORT_DIR, src_path)
6806 def _RunAllocator(self):
6807 """Run the allocator based on input opcode.
6810 nics = [n.ToDict() for n in self.nics]
6811 ial = IAllocator(self.cfg, self.rpc,
6812 mode=constants.IALLOCATOR_MODE_ALLOC,
6813 name=self.op.instance_name,
6814 disk_template=self.op.disk_template,
6817 vcpus=self.be_full[constants.BE_VCPUS],
6818 mem_size=self.be_full[constants.BE_MEMORY],
6821 hypervisor=self.op.hypervisor,
6824 ial.Run(self.op.iallocator)
6827 raise errors.OpPrereqError("Can't compute nodes using"
6828 " iallocator '%s': %s" %
6829 (self.op.iallocator, ial.info),
6831 if len(ial.result) != ial.required_nodes:
6832 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6833 " of nodes (%s), required %s" %
6834 (self.op.iallocator, len(ial.result),
6835 ial.required_nodes), errors.ECODE_FAULT)
6836 self.op.pnode = ial.result[0]
6837 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6838 self.op.instance_name, self.op.iallocator,
6839 utils.CommaJoin(ial.result))
6840 if ial.required_nodes == 2:
6841 self.op.snode = ial.result[1]
6843 def BuildHooksEnv(self):
6846 This runs on master, primary and secondary nodes of the instance.
6850 "ADD_MODE": self.op.mode,
6852 if self.op.mode == constants.INSTANCE_IMPORT:
6853 env["SRC_NODE"] = self.op.src_node
6854 env["SRC_PATH"] = self.op.src_path
6855 env["SRC_IMAGES"] = self.src_images
6857 env.update(_BuildInstanceHookEnv(
6858 name=self.op.instance_name,
6859 primary_node=self.op.pnode,
6860 secondary_nodes=self.secondaries,
6861 status=self.op.start,
6862 os_type=self.op.os_type,
6863 memory=self.be_full[constants.BE_MEMORY],
6864 vcpus=self.be_full[constants.BE_VCPUS],
6865 nics=_NICListToTuple(self, self.nics),
6866 disk_template=self.op.disk_template,
6867 disks=[(d["size"], d["mode"]) for d in self.disks],
6870 hypervisor_name=self.op.hypervisor,
6873 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6877 def _ReadExportInfo(self):
6878 """Reads the export information from disk.
6880 It will override the opcode source node and path with the actual
6881 information, if these two were not specified before.
6883 @return: the export information
6886 assert self.op.mode == constants.INSTANCE_IMPORT
6888 src_node = self.op.src_node
6889 src_path = self.op.src_path
6891 if src_node is None:
6892 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6893 exp_list = self.rpc.call_export_list(locked_nodes)
6895 for node in exp_list:
6896 if exp_list[node].fail_msg:
6898 if src_path in exp_list[node].payload:
6900 self.op.src_node = src_node = node
6901 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6905 raise errors.OpPrereqError("No export found for relative path %s" %
6906 src_path, errors.ECODE_INVAL)
6908 _CheckNodeOnline(self, src_node)
6909 result = self.rpc.call_export_info(src_node, src_path)
6910 result.Raise("No export or invalid export found in dir %s" % src_path)
6912 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6913 if not export_info.has_section(constants.INISECT_EXP):
6914 raise errors.ProgrammerError("Corrupted export config",
6915 errors.ECODE_ENVIRON)
6917 ei_version = export_info.get(constants.INISECT_EXP, "version")
6918 if (int(ei_version) != constants.EXPORT_VERSION):
6919 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6920 (ei_version, constants.EXPORT_VERSION),
6921 errors.ECODE_ENVIRON)
6924 def _ReadExportParams(self, einfo):
6925 """Use export parameters as defaults.
6927 In case the opcode doesn't specify (as in override) some instance
6928 parameters, then try to use them from the export information, if
6932 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6934 if self.op.disk_template is None:
6935 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6936 self.op.disk_template = einfo.get(constants.INISECT_INS,
6939 raise errors.OpPrereqError("No disk template specified and the export"
6940 " is missing the disk_template information",
6943 if not self.op.disks:
6944 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6946 # TODO: import the disk iv_name too
6947 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6948 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6949 disks.append({"size": disk_sz})
6950 self.op.disks = disks
6952 raise errors.OpPrereqError("No disk info specified and the export"
6953 " is missing the disk information",
6956 if (not self.op.nics and
6957 einfo.has_option(constants.INISECT_INS, "nic_count")):
6959 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6961 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6962 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6967 if (self.op.hypervisor is None and
6968 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6969 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6970 if einfo.has_section(constants.INISECT_HYP):
6971 # use the export parameters but do not override the ones
6972 # specified by the user
6973 for name, value in einfo.items(constants.INISECT_HYP):
6974 if name not in self.op.hvparams:
6975 self.op.hvparams[name] = value
6977 if einfo.has_section(constants.INISECT_BEP):
6978 # use the parameters, without overriding
6979 for name, value in einfo.items(constants.INISECT_BEP):
6980 if name not in self.op.beparams:
6981 self.op.beparams[name] = value
6983 # try to read the parameters old style, from the main section
6984 for name in constants.BES_PARAMETERS:
6985 if (name not in self.op.beparams and
6986 einfo.has_option(constants.INISECT_INS, name)):
6987 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6989 if einfo.has_section(constants.INISECT_OSP):
6990 # use the parameters, without overriding
6991 for name, value in einfo.items(constants.INISECT_OSP):
6992 if name not in self.op.osparams:
6993 self.op.osparams[name] = value
6995 def _RevertToDefaults(self, cluster):
6996 """Revert the instance parameters to the default values.
7000 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7001 for name in self.op.hvparams.keys():
7002 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7003 del self.op.hvparams[name]
7005 be_defs = cluster.SimpleFillBE({})
7006 for name in self.op.beparams.keys():
7007 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7008 del self.op.beparams[name]
7010 nic_defs = cluster.SimpleFillNIC({})
7011 for nic in self.op.nics:
7012 for name in constants.NICS_PARAMETERS:
7013 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7016 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7017 for name in self.op.osparams.keys():
7018 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7019 del self.op.osparams[name]
7021 def CheckPrereq(self):
7022 """Check prerequisites.
7025 if self.op.mode == constants.INSTANCE_IMPORT:
7026 export_info = self._ReadExportInfo()
7027 self._ReadExportParams(export_info)
7029 _CheckDiskTemplate(self.op.disk_template)
7031 if (not self.cfg.GetVGName() and
7032 self.op.disk_template not in constants.DTS_NOT_LVM):
7033 raise errors.OpPrereqError("Cluster does not support lvm-based"
7034 " instances", errors.ECODE_STATE)
7036 if self.op.hypervisor is None:
7037 self.op.hypervisor = self.cfg.GetHypervisorType()
7039 cluster = self.cfg.GetClusterInfo()
7040 enabled_hvs = cluster.enabled_hypervisors
7041 if self.op.hypervisor not in enabled_hvs:
7042 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7043 " cluster (%s)" % (self.op.hypervisor,
7044 ",".join(enabled_hvs)),
7047 # check hypervisor parameter syntax (locally)
7048 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7049 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7051 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7052 hv_type.CheckParameterSyntax(filled_hvp)
7053 self.hv_full = filled_hvp
7054 # check that we don't specify global parameters on an instance
7055 _CheckGlobalHvParams(self.op.hvparams)
7057 # fill and remember the beparams dict
7058 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7059 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7061 # build os parameters
7062 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7064 # now that hvp/bep are in final format, let's reset to defaults,
7066 if self.op.identify_defaults:
7067 self._RevertToDefaults(cluster)
7071 for idx, nic in enumerate(self.op.nics):
7072 nic_mode_req = nic.get("mode", None)
7073 nic_mode = nic_mode_req
7074 if nic_mode is None:
7075 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7077 # in routed mode, for the first nic, the default ip is 'auto'
7078 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7079 default_ip_mode = constants.VALUE_AUTO
7081 default_ip_mode = constants.VALUE_NONE
7083 # ip validity checks
7084 ip = nic.get("ip", default_ip_mode)
7085 if ip is None or ip.lower() == constants.VALUE_NONE:
7087 elif ip.lower() == constants.VALUE_AUTO:
7088 if not self.op.name_check:
7089 raise errors.OpPrereqError("IP address set to auto but name checks"
7090 " have been skipped. Aborting.",
7092 nic_ip = self.hostname1.ip
7094 if not netutils.IsValidIP4(ip):
7095 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7096 " like a valid IP" % ip,
7100 # TODO: check the ip address for uniqueness
7101 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7102 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7105 # MAC address verification
7106 mac = nic.get("mac", constants.VALUE_AUTO)
7107 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7108 mac = utils.NormalizeAndValidateMac(mac)
7111 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7112 except errors.ReservationError:
7113 raise errors.OpPrereqError("MAC address %s already in use"
7114 " in cluster" % mac,
7115 errors.ECODE_NOTUNIQUE)
7117 # bridge verification
7118 bridge = nic.get("bridge", None)
7119 link = nic.get("link", None)
7121 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7122 " at the same time", errors.ECODE_INVAL)
7123 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7124 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7131 nicparams[constants.NIC_MODE] = nic_mode_req
7133 nicparams[constants.NIC_LINK] = link
7135 check_params = cluster.SimpleFillNIC(nicparams)
7136 objects.NIC.CheckParameterSyntax(check_params)
7137 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7139 # disk checks/pre-build
7141 for disk in self.op.disks:
7142 mode = disk.get("mode", constants.DISK_RDWR)
7143 if mode not in constants.DISK_ACCESS_SET:
7144 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7145 mode, errors.ECODE_INVAL)
7146 size = disk.get("size", None)
7148 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7151 except (TypeError, ValueError):
7152 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7154 new_disk = {"size": size, "mode": mode}
7156 new_disk["adopt"] = disk["adopt"]
7157 self.disks.append(new_disk)
7159 if self.op.mode == constants.INSTANCE_IMPORT:
7161 # Check that the new instance doesn't have less disks than the export
7162 instance_disks = len(self.disks)
7163 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7164 if instance_disks < export_disks:
7165 raise errors.OpPrereqError("Not enough disks to import."
7166 " (instance: %d, export: %d)" %
7167 (instance_disks, export_disks),
7171 for idx in range(export_disks):
7172 option = 'disk%d_dump' % idx
7173 if export_info.has_option(constants.INISECT_INS, option):
7174 # FIXME: are the old os-es, disk sizes, etc. useful?
7175 export_name = export_info.get(constants.INISECT_INS, option)
7176 image = utils.PathJoin(self.op.src_path, export_name)
7177 disk_images.append(image)
7179 disk_images.append(False)
7181 self.src_images = disk_images
7183 old_name = export_info.get(constants.INISECT_INS, 'name')
7185 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7186 except (TypeError, ValueError), err:
7187 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7188 " an integer: %s" % str(err),
7190 if self.op.instance_name == old_name:
7191 for idx, nic in enumerate(self.nics):
7192 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7193 nic_mac_ini = 'nic%d_mac' % idx
7194 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7196 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7198 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7199 if self.op.ip_check:
7200 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7201 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7202 (self.check_ip, self.op.instance_name),
7203 errors.ECODE_NOTUNIQUE)
7205 #### mac address generation
7206 # By generating here the mac address both the allocator and the hooks get
7207 # the real final mac address rather than the 'auto' or 'generate' value.
7208 # There is a race condition between the generation and the instance object
7209 # creation, which means that we know the mac is valid now, but we're not
7210 # sure it will be when we actually add the instance. If things go bad
7211 # adding the instance will abort because of a duplicate mac, and the
7212 # creation job will fail.
7213 for nic in self.nics:
7214 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7215 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7219 if self.op.iallocator is not None:
7220 self._RunAllocator()
7222 #### node related checks
7224 # check primary node
7225 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7226 assert self.pnode is not None, \
7227 "Cannot retrieve locked node %s" % self.op.pnode
7229 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7230 pnode.name, errors.ECODE_STATE)
7232 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7233 pnode.name, errors.ECODE_STATE)
7235 self.secondaries = []
7237 # mirror node verification
7238 if self.op.disk_template in constants.DTS_NET_MIRROR:
7239 if self.op.snode == pnode.name:
7240 raise errors.OpPrereqError("The secondary node cannot be the"
7241 " primary node.", errors.ECODE_INVAL)
7242 _CheckNodeOnline(self, self.op.snode)
7243 _CheckNodeNotDrained(self, self.op.snode)
7244 self.secondaries.append(self.op.snode)
7246 nodenames = [pnode.name] + self.secondaries
7248 req_size = _ComputeDiskSize(self.op.disk_template,
7251 # Check lv size requirements, if not adopting
7252 if req_size is not None and not self.adopt_disks:
7253 _CheckNodesFreeDisk(self, nodenames, req_size)
7255 if self.adopt_disks: # instead, we must check the adoption data
7256 all_lvs = set([i["adopt"] for i in self.disks])
7257 if len(all_lvs) != len(self.disks):
7258 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7260 for lv_name in all_lvs:
7262 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7263 except errors.ReservationError:
7264 raise errors.OpPrereqError("LV named %s used by another instance" %
7265 lv_name, errors.ECODE_NOTUNIQUE)
7267 node_lvs = self.rpc.call_lv_list([pnode.name],
7268 self.cfg.GetVGName())[pnode.name]
7269 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7270 node_lvs = node_lvs.payload
7271 delta = all_lvs.difference(node_lvs.keys())
7273 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7274 utils.CommaJoin(delta),
7276 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7278 raise errors.OpPrereqError("Online logical volumes found, cannot"
7279 " adopt: %s" % utils.CommaJoin(online_lvs),
7281 # update the size of disk based on what is found
7282 for dsk in self.disks:
7283 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7285 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7287 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7288 # check OS parameters (remotely)
7289 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7291 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7293 # memory check on primary node
7295 _CheckNodeFreeMemory(self, self.pnode.name,
7296 "creating instance %s" % self.op.instance_name,
7297 self.be_full[constants.BE_MEMORY],
7300 self.dry_run_result = list(nodenames)
7302 def Exec(self, feedback_fn):
7303 """Create and add the instance to the cluster.
7306 instance = self.op.instance_name
7307 pnode_name = self.pnode.name
7309 ht_kind = self.op.hypervisor
7310 if ht_kind in constants.HTS_REQ_PORT:
7311 network_port = self.cfg.AllocatePort()
7315 if constants.ENABLE_FILE_STORAGE:
7316 # this is needed because os.path.join does not accept None arguments
7317 if self.op.file_storage_dir is None:
7318 string_file_storage_dir = ""
7320 string_file_storage_dir = self.op.file_storage_dir
7322 # build the full file storage dir path
7323 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7324 string_file_storage_dir, instance)
7326 file_storage_dir = ""
7328 disks = _GenerateDiskTemplate(self,
7329 self.op.disk_template,
7330 instance, pnode_name,
7334 self.op.file_driver,
7337 iobj = objects.Instance(name=instance, os=self.op.os_type,
7338 primary_node=pnode_name,
7339 nics=self.nics, disks=disks,
7340 disk_template=self.op.disk_template,
7342 network_port=network_port,
7343 beparams=self.op.beparams,
7344 hvparams=self.op.hvparams,
7345 hypervisor=self.op.hypervisor,
7346 osparams=self.op.osparams,
7349 if self.adopt_disks:
7350 # rename LVs to the newly-generated names; we need to construct
7351 # 'fake' LV disks with the old data, plus the new unique_id
7352 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7354 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7355 rename_to.append(t_dsk.logical_id)
7356 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7357 self.cfg.SetDiskID(t_dsk, pnode_name)
7358 result = self.rpc.call_blockdev_rename(pnode_name,
7359 zip(tmp_disks, rename_to))
7360 result.Raise("Failed to rename adoped LVs")
7362 feedback_fn("* creating instance disks...")
7364 _CreateDisks(self, iobj)
7365 except errors.OpExecError:
7366 self.LogWarning("Device creation failed, reverting...")
7368 _RemoveDisks(self, iobj)
7370 self.cfg.ReleaseDRBDMinors(instance)
7373 feedback_fn("adding instance %s to cluster config" % instance)
7375 self.cfg.AddInstance(iobj, self.proc.GetECId())
7377 # Declare that we don't want to remove the instance lock anymore, as we've
7378 # added the instance to the config
7379 del self.remove_locks[locking.LEVEL_INSTANCE]
7380 # Unlock all the nodes
7381 if self.op.mode == constants.INSTANCE_IMPORT:
7382 nodes_keep = [self.op.src_node]
7383 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7384 if node != self.op.src_node]
7385 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7386 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7388 self.context.glm.release(locking.LEVEL_NODE)
7389 del self.acquired_locks[locking.LEVEL_NODE]
7391 if self.op.wait_for_sync:
7392 disk_abort = not _WaitForSync(self, iobj)
7393 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7394 # make sure the disks are not degraded (still sync-ing is ok)
7396 feedback_fn("* checking mirrors status")
7397 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7402 _RemoveDisks(self, iobj)
7403 self.cfg.RemoveInstance(iobj.name)
7404 # Make sure the instance lock gets removed
7405 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7406 raise errors.OpExecError("There are some degraded disks for"
7409 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7410 if self.op.mode == constants.INSTANCE_CREATE:
7411 if not self.op.no_install:
7412 feedback_fn("* running the instance OS create scripts...")
7413 # FIXME: pass debug option from opcode to backend
7414 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7415 self.op.debug_level)
7416 result.Raise("Could not add os for instance %s"
7417 " on node %s" % (instance, pnode_name))
7419 elif self.op.mode == constants.INSTANCE_IMPORT:
7420 feedback_fn("* running the instance OS import scripts...")
7424 for idx, image in enumerate(self.src_images):
7428 # FIXME: pass debug option from opcode to backend
7429 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7430 constants.IEIO_FILE, (image, ),
7431 constants.IEIO_SCRIPT,
7432 (iobj.disks[idx], idx),
7434 transfers.append(dt)
7437 masterd.instance.TransferInstanceData(self, feedback_fn,
7438 self.op.src_node, pnode_name,
7439 self.pnode.secondary_ip,
7441 if not compat.all(import_result):
7442 self.LogWarning("Some disks for instance %s on node %s were not"
7443 " imported successfully" % (instance, pnode_name))
7445 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7446 feedback_fn("* preparing remote import...")
7447 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7448 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7450 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7451 self.source_x509_ca,
7452 self._cds, timeouts)
7453 if not compat.all(disk_results):
7454 # TODO: Should the instance still be started, even if some disks
7455 # failed to import (valid for local imports, too)?
7456 self.LogWarning("Some disks for instance %s on node %s were not"
7457 " imported successfully" % (instance, pnode_name))
7459 # Run rename script on newly imported instance
7460 assert iobj.name == instance
7461 feedback_fn("Running rename script for %s" % instance)
7462 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7463 self.source_instance_name,
7464 self.op.debug_level)
7466 self.LogWarning("Failed to run rename script for %s on node"
7467 " %s: %s" % (instance, pnode_name, result.fail_msg))
7470 # also checked in the prereq part
7471 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7475 iobj.admin_up = True
7476 self.cfg.Update(iobj, feedback_fn)
7477 logging.info("Starting instance %s on node %s", instance, pnode_name)
7478 feedback_fn("* starting instance...")
7479 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7480 result.Raise("Could not start instance")
7482 return list(iobj.all_nodes)
7485 class LUConnectConsole(NoHooksLU):
7486 """Connect to an instance's console.
7488 This is somewhat special in that it returns the command line that
7489 you need to run on the master node in order to connect to the
7498 def ExpandNames(self):
7499 self._ExpandAndLockInstance()
7501 def CheckPrereq(self):
7502 """Check prerequisites.
7504 This checks that the instance is in the cluster.
7507 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7508 assert self.instance is not None, \
7509 "Cannot retrieve locked instance %s" % self.op.instance_name
7510 _CheckNodeOnline(self, self.instance.primary_node)
7512 def Exec(self, feedback_fn):
7513 """Connect to the console of an instance
7516 instance = self.instance
7517 node = instance.primary_node
7519 node_insts = self.rpc.call_instance_list([node],
7520 [instance.hypervisor])[node]
7521 node_insts.Raise("Can't get node information from %s" % node)
7523 if instance.name not in node_insts.payload:
7524 raise errors.OpExecError("Instance %s is not running." % instance.name)
7526 logging.debug("Connecting to console of %s on %s", instance.name, node)
7528 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7529 cluster = self.cfg.GetClusterInfo()
7530 # beparams and hvparams are passed separately, to avoid editing the
7531 # instance and then saving the defaults in the instance itself.
7532 hvparams = cluster.FillHV(instance)
7533 beparams = cluster.FillBE(instance)
7534 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7537 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7540 class LUReplaceDisks(LogicalUnit):
7541 """Replace the disks of an instance.
7544 HPATH = "mirrors-replace"
7545 HTYPE = constants.HTYPE_INSTANCE
7548 ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7549 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7550 ("remote_node", None, _TMaybeString),
7551 ("iallocator", None, _TMaybeString),
7552 ("early_release", False, _TBool),
7556 def CheckArguments(self):
7557 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7560 def ExpandNames(self):
7561 self._ExpandAndLockInstance()
7563 if self.op.iallocator is not None:
7564 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7566 elif self.op.remote_node is not None:
7567 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7568 self.op.remote_node = remote_node
7570 # Warning: do not remove the locking of the new secondary here
7571 # unless DRBD8.AddChildren is changed to work in parallel;
7572 # currently it doesn't since parallel invocations of
7573 # FindUnusedMinor will conflict
7574 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7575 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7578 self.needed_locks[locking.LEVEL_NODE] = []
7579 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7581 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7582 self.op.iallocator, self.op.remote_node,
7583 self.op.disks, False, self.op.early_release)
7585 self.tasklets = [self.replacer]
7587 def DeclareLocks(self, level):
7588 # If we're not already locking all nodes in the set we have to declare the
7589 # instance's primary/secondary nodes.
7590 if (level == locking.LEVEL_NODE and
7591 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7592 self._LockInstancesNodes()
7594 def BuildHooksEnv(self):
7597 This runs on the master, the primary and all the secondaries.
7600 instance = self.replacer.instance
7602 "MODE": self.op.mode,
7603 "NEW_SECONDARY": self.op.remote_node,
7604 "OLD_SECONDARY": instance.secondary_nodes[0],
7606 env.update(_BuildInstanceHookEnvByObject(self, instance))
7608 self.cfg.GetMasterNode(),
7609 instance.primary_node,
7611 if self.op.remote_node is not None:
7612 nl.append(self.op.remote_node)
7616 class TLReplaceDisks(Tasklet):
7617 """Replaces disks for an instance.
7619 Note: Locking is not within the scope of this class.
7622 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7623 disks, delay_iallocator, early_release):
7624 """Initializes this class.
7627 Tasklet.__init__(self, lu)
7630 self.instance_name = instance_name
7632 self.iallocator_name = iallocator_name
7633 self.remote_node = remote_node
7635 self.delay_iallocator = delay_iallocator
7636 self.early_release = early_release
7639 self.instance = None
7640 self.new_node = None
7641 self.target_node = None
7642 self.other_node = None
7643 self.remote_node_info = None
7644 self.node_secondary_ip = None
7647 def CheckArguments(mode, remote_node, iallocator):
7648 """Helper function for users of this class.
7651 # check for valid parameter combination
7652 if mode == constants.REPLACE_DISK_CHG:
7653 if remote_node is None and iallocator is None:
7654 raise errors.OpPrereqError("When changing the secondary either an"
7655 " iallocator script must be used or the"
7656 " new node given", errors.ECODE_INVAL)
7658 if remote_node is not None and iallocator is not None:
7659 raise errors.OpPrereqError("Give either the iallocator or the new"
7660 " secondary, not both", errors.ECODE_INVAL)
7662 elif remote_node is not None or iallocator is not None:
7663 # Not replacing the secondary
7664 raise errors.OpPrereqError("The iallocator and new node options can"
7665 " only be used when changing the"
7666 " secondary node", errors.ECODE_INVAL)
7669 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7670 """Compute a new secondary node using an IAllocator.
7673 ial = IAllocator(lu.cfg, lu.rpc,
7674 mode=constants.IALLOCATOR_MODE_RELOC,
7676 relocate_from=relocate_from)
7678 ial.Run(iallocator_name)
7681 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7682 " %s" % (iallocator_name, ial.info),
7685 if len(ial.result) != ial.required_nodes:
7686 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7687 " of nodes (%s), required %s" %
7689 len(ial.result), ial.required_nodes),
7692 remote_node_name = ial.result[0]
7694 lu.LogInfo("Selected new secondary for instance '%s': %s",
7695 instance_name, remote_node_name)
7697 return remote_node_name
7699 def _FindFaultyDisks(self, node_name):
7700 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7703 def CheckPrereq(self):
7704 """Check prerequisites.
7706 This checks that the instance is in the cluster.
7709 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7710 assert instance is not None, \
7711 "Cannot retrieve locked instance %s" % self.instance_name
7713 if instance.disk_template != constants.DT_DRBD8:
7714 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7715 " instances", errors.ECODE_INVAL)
7717 if len(instance.secondary_nodes) != 1:
7718 raise errors.OpPrereqError("The instance has a strange layout,"
7719 " expected one secondary but found %d" %
7720 len(instance.secondary_nodes),
7723 if not self.delay_iallocator:
7724 self._CheckPrereq2()
7726 def _CheckPrereq2(self):
7727 """Check prerequisites, second part.
7729 This function should always be part of CheckPrereq. It was separated and is
7730 now called from Exec because during node evacuation iallocator was only
7731 called with an unmodified cluster model, not taking planned changes into
7735 instance = self.instance
7736 secondary_node = instance.secondary_nodes[0]
7738 if self.iallocator_name is None:
7739 remote_node = self.remote_node
7741 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7742 instance.name, instance.secondary_nodes)
7744 if remote_node is not None:
7745 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7746 assert self.remote_node_info is not None, \
7747 "Cannot retrieve locked node %s" % remote_node
7749 self.remote_node_info = None
7751 if remote_node == self.instance.primary_node:
7752 raise errors.OpPrereqError("The specified node is the primary node of"
7753 " the instance.", errors.ECODE_INVAL)
7755 if remote_node == secondary_node:
7756 raise errors.OpPrereqError("The specified node is already the"
7757 " secondary node of the instance.",
7760 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7761 constants.REPLACE_DISK_CHG):
7762 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7765 if self.mode == constants.REPLACE_DISK_AUTO:
7766 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7767 faulty_secondary = self._FindFaultyDisks(secondary_node)
7769 if faulty_primary and faulty_secondary:
7770 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7771 " one node and can not be repaired"
7772 " automatically" % self.instance_name,
7776 self.disks = faulty_primary
7777 self.target_node = instance.primary_node
7778 self.other_node = secondary_node
7779 check_nodes = [self.target_node, self.other_node]
7780 elif faulty_secondary:
7781 self.disks = faulty_secondary
7782 self.target_node = secondary_node
7783 self.other_node = instance.primary_node
7784 check_nodes = [self.target_node, self.other_node]
7790 # Non-automatic modes
7791 if self.mode == constants.REPLACE_DISK_PRI:
7792 self.target_node = instance.primary_node
7793 self.other_node = secondary_node
7794 check_nodes = [self.target_node, self.other_node]
7796 elif self.mode == constants.REPLACE_DISK_SEC:
7797 self.target_node = secondary_node
7798 self.other_node = instance.primary_node
7799 check_nodes = [self.target_node, self.other_node]
7801 elif self.mode == constants.REPLACE_DISK_CHG:
7802 self.new_node = remote_node
7803 self.other_node = instance.primary_node
7804 self.target_node = secondary_node
7805 check_nodes = [self.new_node, self.other_node]
7807 _CheckNodeNotDrained(self.lu, remote_node)
7809 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7810 assert old_node_info is not None
7811 if old_node_info.offline and not self.early_release:
7812 # doesn't make sense to delay the release
7813 self.early_release = True
7814 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7815 " early-release mode", secondary_node)
7818 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7821 # If not specified all disks should be replaced
7823 self.disks = range(len(self.instance.disks))
7825 for node in check_nodes:
7826 _CheckNodeOnline(self.lu, node)
7828 # Check whether disks are valid
7829 for disk_idx in self.disks:
7830 instance.FindDisk(disk_idx)
7832 # Get secondary node IP addresses
7835 for node_name in [self.target_node, self.other_node, self.new_node]:
7836 if node_name is not None:
7837 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7839 self.node_secondary_ip = node_2nd_ip
7841 def Exec(self, feedback_fn):
7842 """Execute disk replacement.
7844 This dispatches the disk replacement to the appropriate handler.
7847 if self.delay_iallocator:
7848 self._CheckPrereq2()
7851 feedback_fn("No disks need replacement")
7854 feedback_fn("Replacing disk(s) %s for %s" %
7855 (utils.CommaJoin(self.disks), self.instance.name))
7857 activate_disks = (not self.instance.admin_up)
7859 # Activate the instance disks if we're replacing them on a down instance
7861 _StartInstanceDisks(self.lu, self.instance, True)
7864 # Should we replace the secondary node?
7865 if self.new_node is not None:
7866 fn = self._ExecDrbd8Secondary
7868 fn = self._ExecDrbd8DiskOnly
7870 return fn(feedback_fn)
7873 # Deactivate the instance disks if we're replacing them on a
7876 _SafeShutdownInstanceDisks(self.lu, self.instance)
7878 def _CheckVolumeGroup(self, nodes):
7879 self.lu.LogInfo("Checking volume groups")
7881 vgname = self.cfg.GetVGName()
7883 # Make sure volume group exists on all involved nodes
7884 results = self.rpc.call_vg_list(nodes)
7886 raise errors.OpExecError("Can't list volume groups on the nodes")
7890 res.Raise("Error checking node %s" % node)
7891 if vgname not in res.payload:
7892 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7895 def _CheckDisksExistence(self, nodes):
7896 # Check disk existence
7897 for idx, dev in enumerate(self.instance.disks):
7898 if idx not in self.disks:
7902 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7903 self.cfg.SetDiskID(dev, node)
7905 result = self.rpc.call_blockdev_find(node, dev)
7907 msg = result.fail_msg
7908 if msg or not result.payload:
7910 msg = "disk not found"
7911 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7914 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7915 for idx, dev in enumerate(self.instance.disks):
7916 if idx not in self.disks:
7919 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7922 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7924 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7925 " replace disks for instance %s" %
7926 (node_name, self.instance.name))
7928 def _CreateNewStorage(self, node_name):
7929 vgname = self.cfg.GetVGName()
7932 for idx, dev in enumerate(self.instance.disks):
7933 if idx not in self.disks:
7936 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7938 self.cfg.SetDiskID(dev, node_name)
7940 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7941 names = _GenerateUniqueNames(self.lu, lv_names)
7943 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7944 logical_id=(vgname, names[0]))
7945 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7946 logical_id=(vgname, names[1]))
7948 new_lvs = [lv_data, lv_meta]
7949 old_lvs = dev.children
7950 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7952 # we pass force_create=True to force the LVM creation
7953 for new_lv in new_lvs:
7954 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7955 _GetInstanceInfoText(self.instance), False)
7959 def _CheckDevices(self, node_name, iv_names):
7960 for name, (dev, _, _) in iv_names.iteritems():
7961 self.cfg.SetDiskID(dev, node_name)
7963 result = self.rpc.call_blockdev_find(node_name, dev)
7965 msg = result.fail_msg
7966 if msg or not result.payload:
7968 msg = "disk not found"
7969 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7972 if result.payload.is_degraded:
7973 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7975 def _RemoveOldStorage(self, node_name, iv_names):
7976 for name, (_, old_lvs, _) in iv_names.iteritems():
7977 self.lu.LogInfo("Remove logical volumes for %s" % name)
7980 self.cfg.SetDiskID(lv, node_name)
7982 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7984 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7985 hint="remove unused LVs manually")
7987 def _ReleaseNodeLock(self, node_name):
7988 """Releases the lock for a given node."""
7989 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7991 def _ExecDrbd8DiskOnly(self, feedback_fn):
7992 """Replace a disk on the primary or secondary for DRBD 8.
7994 The algorithm for replace is quite complicated:
7996 1. for each disk to be replaced:
7998 1. create new LVs on the target node with unique names
7999 1. detach old LVs from the drbd device
8000 1. rename old LVs to name_replaced.<time_t>
8001 1. rename new LVs to old LVs
8002 1. attach the new LVs (with the old names now) to the drbd device
8004 1. wait for sync across all devices
8006 1. for each modified disk:
8008 1. remove old LVs (which have the name name_replaces.<time_t>)
8010 Failures are not very well handled.
8015 # Step: check device activation
8016 self.lu.LogStep(1, steps_total, "Check device existence")
8017 self._CheckDisksExistence([self.other_node, self.target_node])
8018 self._CheckVolumeGroup([self.target_node, self.other_node])
8020 # Step: check other node consistency
8021 self.lu.LogStep(2, steps_total, "Check peer consistency")
8022 self._CheckDisksConsistency(self.other_node,
8023 self.other_node == self.instance.primary_node,
8026 # Step: create new storage
8027 self.lu.LogStep(3, steps_total, "Allocate new storage")
8028 iv_names = self._CreateNewStorage(self.target_node)
8030 # Step: for each lv, detach+rename*2+attach
8031 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8032 for dev, old_lvs, new_lvs in iv_names.itervalues():
8033 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8035 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8037 result.Raise("Can't detach drbd from local storage on node"
8038 " %s for device %s" % (self.target_node, dev.iv_name))
8040 #cfg.Update(instance)
8042 # ok, we created the new LVs, so now we know we have the needed
8043 # storage; as such, we proceed on the target node to rename
8044 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8045 # using the assumption that logical_id == physical_id (which in
8046 # turn is the unique_id on that node)
8048 # FIXME(iustin): use a better name for the replaced LVs
8049 temp_suffix = int(time.time())
8050 ren_fn = lambda d, suff: (d.physical_id[0],
8051 d.physical_id[1] + "_replaced-%s" % suff)
8053 # Build the rename list based on what LVs exist on the node
8054 rename_old_to_new = []
8055 for to_ren in old_lvs:
8056 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8057 if not result.fail_msg and result.payload:
8059 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8061 self.lu.LogInfo("Renaming the old LVs on the target node")
8062 result = self.rpc.call_blockdev_rename(self.target_node,
8064 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8066 # Now we rename the new LVs to the old LVs
8067 self.lu.LogInfo("Renaming the new LVs on the target node")
8068 rename_new_to_old = [(new, old.physical_id)
8069 for old, new in zip(old_lvs, new_lvs)]
8070 result = self.rpc.call_blockdev_rename(self.target_node,
8072 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8074 for old, new in zip(old_lvs, new_lvs):
8075 new.logical_id = old.logical_id
8076 self.cfg.SetDiskID(new, self.target_node)
8078 for disk in old_lvs:
8079 disk.logical_id = ren_fn(disk, temp_suffix)
8080 self.cfg.SetDiskID(disk, self.target_node)
8082 # Now that the new lvs have the old name, we can add them to the device
8083 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8084 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8086 msg = result.fail_msg
8088 for new_lv in new_lvs:
8089 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8092 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8093 hint=("cleanup manually the unused logical"
8095 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8097 dev.children = new_lvs
8099 self.cfg.Update(self.instance, feedback_fn)
8102 if self.early_release:
8103 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8105 self._RemoveOldStorage(self.target_node, iv_names)
8106 # WARNING: we release both node locks here, do not do other RPCs
8107 # than WaitForSync to the primary node
8108 self._ReleaseNodeLock([self.target_node, self.other_node])
8111 # This can fail as the old devices are degraded and _WaitForSync
8112 # does a combined result over all disks, so we don't check its return value
8113 self.lu.LogStep(cstep, steps_total, "Sync devices")
8115 _WaitForSync(self.lu, self.instance)
8117 # Check all devices manually
8118 self._CheckDevices(self.instance.primary_node, iv_names)
8120 # Step: remove old storage
8121 if not self.early_release:
8122 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8124 self._RemoveOldStorage(self.target_node, iv_names)
8126 def _ExecDrbd8Secondary(self, feedback_fn):
8127 """Replace the secondary node for DRBD 8.
8129 The algorithm for replace is quite complicated:
8130 - for all disks of the instance:
8131 - create new LVs on the new node with same names
8132 - shutdown the drbd device on the old secondary
8133 - disconnect the drbd network on the primary
8134 - create the drbd device on the new secondary
8135 - network attach the drbd on the primary, using an artifice:
8136 the drbd code for Attach() will connect to the network if it
8137 finds a device which is connected to the good local disks but
8139 - wait for sync across all devices
8140 - remove all disks from the old secondary
8142 Failures are not very well handled.
8147 # Step: check device activation
8148 self.lu.LogStep(1, steps_total, "Check device existence")
8149 self._CheckDisksExistence([self.instance.primary_node])
8150 self._CheckVolumeGroup([self.instance.primary_node])
8152 # Step: check other node consistency
8153 self.lu.LogStep(2, steps_total, "Check peer consistency")
8154 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8156 # Step: create new storage
8157 self.lu.LogStep(3, steps_total, "Allocate new storage")
8158 for idx, dev in enumerate(self.instance.disks):
8159 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8160 (self.new_node, idx))
8161 # we pass force_create=True to force LVM creation
8162 for new_lv in dev.children:
8163 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8164 _GetInstanceInfoText(self.instance), False)
8166 # Step 4: dbrd minors and drbd setups changes
8167 # after this, we must manually remove the drbd minors on both the
8168 # error and the success paths
8169 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8170 minors = self.cfg.AllocateDRBDMinor([self.new_node
8171 for dev in self.instance.disks],
8173 logging.debug("Allocated minors %r", minors)
8176 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8177 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8178 (self.new_node, idx))
8179 # create new devices on new_node; note that we create two IDs:
8180 # one without port, so the drbd will be activated without
8181 # networking information on the new node at this stage, and one
8182 # with network, for the latter activation in step 4
8183 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8184 if self.instance.primary_node == o_node1:
8187 assert self.instance.primary_node == o_node2, "Three-node instance?"
8190 new_alone_id = (self.instance.primary_node, self.new_node, None,
8191 p_minor, new_minor, o_secret)
8192 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8193 p_minor, new_minor, o_secret)
8195 iv_names[idx] = (dev, dev.children, new_net_id)
8196 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8198 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8199 logical_id=new_alone_id,
8200 children=dev.children,
8203 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8204 _GetInstanceInfoText(self.instance), False)
8205 except errors.GenericError:
8206 self.cfg.ReleaseDRBDMinors(self.instance.name)
8209 # We have new devices, shutdown the drbd on the old secondary
8210 for idx, dev in enumerate(self.instance.disks):
8211 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8212 self.cfg.SetDiskID(dev, self.target_node)
8213 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8215 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8216 "node: %s" % (idx, msg),
8217 hint=("Please cleanup this device manually as"
8218 " soon as possible"))
8220 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8221 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8222 self.node_secondary_ip,
8223 self.instance.disks)\
8224 [self.instance.primary_node]
8226 msg = result.fail_msg
8228 # detaches didn't succeed (unlikely)
8229 self.cfg.ReleaseDRBDMinors(self.instance.name)
8230 raise errors.OpExecError("Can't detach the disks from the network on"
8231 " old node: %s" % (msg,))
8233 # if we managed to detach at least one, we update all the disks of
8234 # the instance to point to the new secondary
8235 self.lu.LogInfo("Updating instance configuration")
8236 for dev, _, new_logical_id in iv_names.itervalues():
8237 dev.logical_id = new_logical_id
8238 self.cfg.SetDiskID(dev, self.instance.primary_node)
8240 self.cfg.Update(self.instance, feedback_fn)
8242 # and now perform the drbd attach
8243 self.lu.LogInfo("Attaching primary drbds to new secondary"
8244 " (standalone => connected)")
8245 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8247 self.node_secondary_ip,
8248 self.instance.disks,
8251 for to_node, to_result in result.items():
8252 msg = to_result.fail_msg
8254 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8256 hint=("please do a gnt-instance info to see the"
8257 " status of disks"))
8259 if self.early_release:
8260 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8262 self._RemoveOldStorage(self.target_node, iv_names)
8263 # WARNING: we release all node locks here, do not do other RPCs
8264 # than WaitForSync to the primary node
8265 self._ReleaseNodeLock([self.instance.primary_node,
8270 # This can fail as the old devices are degraded and _WaitForSync
8271 # does a combined result over all disks, so we don't check its return value
8272 self.lu.LogStep(cstep, steps_total, "Sync devices")
8274 _WaitForSync(self.lu, self.instance)
8276 # Check all devices manually
8277 self._CheckDevices(self.instance.primary_node, iv_names)
8279 # Step: remove old storage
8280 if not self.early_release:
8281 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8282 self._RemoveOldStorage(self.target_node, iv_names)
8285 class LURepairNodeStorage(NoHooksLU):
8286 """Repairs the volume group on a node.
8291 ("storage_type", _NoDefault, _CheckStorageType),
8292 ("name", _NoDefault, _TNonEmptyString),
8293 ("ignore_consistency", False, _TBool),
8297 def CheckArguments(self):
8298 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8300 storage_type = self.op.storage_type
8302 if (constants.SO_FIX_CONSISTENCY not in
8303 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8304 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8305 " repaired" % storage_type,
8308 def ExpandNames(self):
8309 self.needed_locks = {
8310 locking.LEVEL_NODE: [self.op.node_name],
8313 def _CheckFaultyDisks(self, instance, node_name):
8314 """Ensure faulty disks abort the opcode or at least warn."""
8316 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8318 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8319 " node '%s'" % (instance.name, node_name),
8321 except errors.OpPrereqError, err:
8322 if self.op.ignore_consistency:
8323 self.proc.LogWarning(str(err.args[0]))
8327 def CheckPrereq(self):
8328 """Check prerequisites.
8331 # Check whether any instance on this node has faulty disks
8332 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8333 if not inst.admin_up:
8335 check_nodes = set(inst.all_nodes)
8336 check_nodes.discard(self.op.node_name)
8337 for inst_node_name in check_nodes:
8338 self._CheckFaultyDisks(inst, inst_node_name)
8340 def Exec(self, feedback_fn):
8341 feedback_fn("Repairing storage unit '%s' on %s ..." %
8342 (self.op.name, self.op.node_name))
8344 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8345 result = self.rpc.call_storage_execute(self.op.node_name,
8346 self.op.storage_type, st_args,
8348 constants.SO_FIX_CONSISTENCY)
8349 result.Raise("Failed to repair storage unit '%s' on %s" %
8350 (self.op.name, self.op.node_name))
8353 class LUNodeEvacuationStrategy(NoHooksLU):
8354 """Computes the node evacuation strategy.
8358 ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8359 ("remote_node", None, _TMaybeString),
8360 ("iallocator", None, _TMaybeString),
8364 def CheckArguments(self):
8365 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8367 def ExpandNames(self):
8368 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8369 self.needed_locks = locks = {}
8370 if self.op.remote_node is None:
8371 locks[locking.LEVEL_NODE] = locking.ALL_SET
8373 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8374 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8376 def Exec(self, feedback_fn):
8377 if self.op.remote_node is not None:
8379 for node in self.op.nodes:
8380 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8383 if i.primary_node == self.op.remote_node:
8384 raise errors.OpPrereqError("Node %s is the primary node of"
8385 " instance %s, cannot use it as"
8387 (self.op.remote_node, i.name),
8389 result.append([i.name, self.op.remote_node])
8391 ial = IAllocator(self.cfg, self.rpc,
8392 mode=constants.IALLOCATOR_MODE_MEVAC,
8393 evac_nodes=self.op.nodes)
8394 ial.Run(self.op.iallocator, validate=True)
8396 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8402 class LUGrowDisk(LogicalUnit):
8403 """Grow a disk of an instance.
8407 HTYPE = constants.HTYPE_INSTANCE
8410 ("disk", _NoDefault, _TInt),
8411 ("amount", _NoDefault, _TInt),
8412 ("wait_for_sync", True, _TBool),
8416 def ExpandNames(self):
8417 self._ExpandAndLockInstance()
8418 self.needed_locks[locking.LEVEL_NODE] = []
8419 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8421 def DeclareLocks(self, level):
8422 if level == locking.LEVEL_NODE:
8423 self._LockInstancesNodes()
8425 def BuildHooksEnv(self):
8428 This runs on the master, the primary and all the secondaries.
8432 "DISK": self.op.disk,
8433 "AMOUNT": self.op.amount,
8435 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8436 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8439 def CheckPrereq(self):
8440 """Check prerequisites.
8442 This checks that the instance is in the cluster.
8445 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8446 assert instance is not None, \
8447 "Cannot retrieve locked instance %s" % self.op.instance_name
8448 nodenames = list(instance.all_nodes)
8449 for node in nodenames:
8450 _CheckNodeOnline(self, node)
8452 self.instance = instance
8454 if instance.disk_template not in constants.DTS_GROWABLE:
8455 raise errors.OpPrereqError("Instance's disk layout does not support"
8456 " growing.", errors.ECODE_INVAL)
8458 self.disk = instance.FindDisk(self.op.disk)
8460 if instance.disk_template != constants.DT_FILE:
8461 # TODO: check the free disk space for file, when that feature will be
8463 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8465 def Exec(self, feedback_fn):
8466 """Execute disk grow.
8469 instance = self.instance
8472 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8474 raise errors.OpExecError("Cannot activate block device to grow")
8476 for node in instance.all_nodes:
8477 self.cfg.SetDiskID(disk, node)
8478 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8479 result.Raise("Grow request failed to node %s" % node)
8481 # TODO: Rewrite code to work properly
8482 # DRBD goes into sync mode for a short amount of time after executing the
8483 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8484 # calling "resize" in sync mode fails. Sleeping for a short amount of
8485 # time is a work-around.
8488 disk.RecordGrow(self.op.amount)
8489 self.cfg.Update(instance, feedback_fn)
8490 if self.op.wait_for_sync:
8491 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8493 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8494 " status.\nPlease check the instance.")
8495 if not instance.admin_up:
8496 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8497 elif not instance.admin_up:
8498 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8499 " not supposed to be running because no wait for"
8500 " sync mode was requested.")
8503 class LUQueryInstanceData(NoHooksLU):
8504 """Query runtime instance data.
8508 ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8509 ("static", False, _TBool),
8513 def ExpandNames(self):
8514 self.needed_locks = {}
8515 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8517 if self.op.instances:
8518 self.wanted_names = []
8519 for name in self.op.instances:
8520 full_name = _ExpandInstanceName(self.cfg, name)
8521 self.wanted_names.append(full_name)
8522 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8524 self.wanted_names = None
8525 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8527 self.needed_locks[locking.LEVEL_NODE] = []
8528 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8530 def DeclareLocks(self, level):
8531 if level == locking.LEVEL_NODE:
8532 self._LockInstancesNodes()
8534 def CheckPrereq(self):
8535 """Check prerequisites.
8537 This only checks the optional instance list against the existing names.
8540 if self.wanted_names is None:
8541 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8543 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8544 in self.wanted_names]
8546 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8547 """Returns the status of a block device
8550 if self.op.static or not node:
8553 self.cfg.SetDiskID(dev, node)
8555 result = self.rpc.call_blockdev_find(node, dev)
8559 result.Raise("Can't compute disk status for %s" % instance_name)
8561 status = result.payload
8565 return (status.dev_path, status.major, status.minor,
8566 status.sync_percent, status.estimated_time,
8567 status.is_degraded, status.ldisk_status)
8569 def _ComputeDiskStatus(self, instance, snode, dev):
8570 """Compute block device status.
8573 if dev.dev_type in constants.LDS_DRBD:
8574 # we change the snode then (otherwise we use the one passed in)
8575 if dev.logical_id[0] == instance.primary_node:
8576 snode = dev.logical_id[1]
8578 snode = dev.logical_id[0]
8580 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8582 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8585 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8586 for child in dev.children]
8591 "iv_name": dev.iv_name,
8592 "dev_type": dev.dev_type,
8593 "logical_id": dev.logical_id,
8594 "physical_id": dev.physical_id,
8595 "pstatus": dev_pstatus,
8596 "sstatus": dev_sstatus,
8597 "children": dev_children,
8604 def Exec(self, feedback_fn):
8605 """Gather and return data"""
8608 cluster = self.cfg.GetClusterInfo()
8610 for instance in self.wanted_instances:
8611 if not self.op.static:
8612 remote_info = self.rpc.call_instance_info(instance.primary_node,
8614 instance.hypervisor)
8615 remote_info.Raise("Error checking node %s" % instance.primary_node)
8616 remote_info = remote_info.payload
8617 if remote_info and "state" in remote_info:
8620 remote_state = "down"
8623 if instance.admin_up:
8626 config_state = "down"
8628 disks = [self._ComputeDiskStatus(instance, None, device)
8629 for device in instance.disks]
8632 "name": instance.name,
8633 "config_state": config_state,
8634 "run_state": remote_state,
8635 "pnode": instance.primary_node,
8636 "snodes": instance.secondary_nodes,
8638 # this happens to be the same format used for hooks
8639 "nics": _NICListToTuple(self, instance.nics),
8640 "disk_template": instance.disk_template,
8642 "hypervisor": instance.hypervisor,
8643 "network_port": instance.network_port,
8644 "hv_instance": instance.hvparams,
8645 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8646 "be_instance": instance.beparams,
8647 "be_actual": cluster.FillBE(instance),
8648 "os_instance": instance.osparams,
8649 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8650 "serial_no": instance.serial_no,
8651 "mtime": instance.mtime,
8652 "ctime": instance.ctime,
8653 "uuid": instance.uuid,
8656 result[instance.name] = idict
8661 class LUSetInstanceParams(LogicalUnit):
8662 """Modifies an instances's parameters.
8665 HPATH = "instance-modify"
8666 HTYPE = constants.HTYPE_INSTANCE
8669 ("nics", _EmptyList, _TList),
8670 ("disks", _EmptyList, _TList),
8671 ("beparams", _EmptyDict, _TDict),
8672 ("hvparams", _EmptyDict, _TDict),
8673 ("disk_template", None, _TMaybeString),
8674 ("remote_node", None, _TMaybeString),
8675 ("os_name", None, _TMaybeString),
8676 ("force_variant", False, _TBool),
8677 ("osparams", None, _TOr(_TDict, _TNone)),
8682 def CheckArguments(self):
8683 if not (self.op.nics or self.op.disks or self.op.disk_template or
8684 self.op.hvparams or self.op.beparams or self.op.os_name):
8685 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8687 if self.op.hvparams:
8688 _CheckGlobalHvParams(self.op.hvparams)
8692 for disk_op, disk_dict in self.op.disks:
8693 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8694 if disk_op == constants.DDM_REMOVE:
8697 elif disk_op == constants.DDM_ADD:
8700 if not isinstance(disk_op, int):
8701 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8702 if not isinstance(disk_dict, dict):
8703 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8704 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8706 if disk_op == constants.DDM_ADD:
8707 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8708 if mode not in constants.DISK_ACCESS_SET:
8709 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8711 size = disk_dict.get('size', None)
8713 raise errors.OpPrereqError("Required disk parameter size missing",
8717 except (TypeError, ValueError), err:
8718 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8719 str(err), errors.ECODE_INVAL)
8720 disk_dict['size'] = size
8722 # modification of disk
8723 if 'size' in disk_dict:
8724 raise errors.OpPrereqError("Disk size change not possible, use"
8725 " grow-disk", errors.ECODE_INVAL)
8727 if disk_addremove > 1:
8728 raise errors.OpPrereqError("Only one disk add or remove operation"
8729 " supported at a time", errors.ECODE_INVAL)
8731 if self.op.disks and self.op.disk_template is not None:
8732 raise errors.OpPrereqError("Disk template conversion and other disk"
8733 " changes not supported at the same time",
8736 if self.op.disk_template:
8737 _CheckDiskTemplate(self.op.disk_template)
8738 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8739 self.op.remote_node is None):
8740 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8741 " one requires specifying a secondary node",
8746 for nic_op, nic_dict in self.op.nics:
8747 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8748 if nic_op == constants.DDM_REMOVE:
8751 elif nic_op == constants.DDM_ADD:
8754 if not isinstance(nic_op, int):
8755 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8756 if not isinstance(nic_dict, dict):
8757 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8758 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8760 # nic_dict should be a dict
8761 nic_ip = nic_dict.get('ip', None)
8762 if nic_ip is not None:
8763 if nic_ip.lower() == constants.VALUE_NONE:
8764 nic_dict['ip'] = None
8766 if not netutils.IsValidIP4(nic_ip):
8767 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8770 nic_bridge = nic_dict.get('bridge', None)
8771 nic_link = nic_dict.get('link', None)
8772 if nic_bridge and nic_link:
8773 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8774 " at the same time", errors.ECODE_INVAL)
8775 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8776 nic_dict['bridge'] = None
8777 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8778 nic_dict['link'] = None
8780 if nic_op == constants.DDM_ADD:
8781 nic_mac = nic_dict.get('mac', None)
8783 nic_dict['mac'] = constants.VALUE_AUTO
8785 if 'mac' in nic_dict:
8786 nic_mac = nic_dict['mac']
8787 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8788 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8790 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8791 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8792 " modifying an existing nic",
8795 if nic_addremove > 1:
8796 raise errors.OpPrereqError("Only one NIC add or remove operation"
8797 " supported at a time", errors.ECODE_INVAL)
8799 def ExpandNames(self):
8800 self._ExpandAndLockInstance()
8801 self.needed_locks[locking.LEVEL_NODE] = []
8802 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8804 def DeclareLocks(self, level):
8805 if level == locking.LEVEL_NODE:
8806 self._LockInstancesNodes()
8807 if self.op.disk_template and self.op.remote_node:
8808 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8809 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8811 def BuildHooksEnv(self):
8814 This runs on the master, primary and secondaries.
8818 if constants.BE_MEMORY in self.be_new:
8819 args['memory'] = self.be_new[constants.BE_MEMORY]
8820 if constants.BE_VCPUS in self.be_new:
8821 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8822 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8823 # information at all.
8826 nic_override = dict(self.op.nics)
8827 for idx, nic in enumerate(self.instance.nics):
8828 if idx in nic_override:
8829 this_nic_override = nic_override[idx]
8831 this_nic_override = {}
8832 if 'ip' in this_nic_override:
8833 ip = this_nic_override['ip']
8836 if 'mac' in this_nic_override:
8837 mac = this_nic_override['mac']
8840 if idx in self.nic_pnew:
8841 nicparams = self.nic_pnew[idx]
8843 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8844 mode = nicparams[constants.NIC_MODE]
8845 link = nicparams[constants.NIC_LINK]
8846 args['nics'].append((ip, mac, mode, link))
8847 if constants.DDM_ADD in nic_override:
8848 ip = nic_override[constants.DDM_ADD].get('ip', None)
8849 mac = nic_override[constants.DDM_ADD]['mac']
8850 nicparams = self.nic_pnew[constants.DDM_ADD]
8851 mode = nicparams[constants.NIC_MODE]
8852 link = nicparams[constants.NIC_LINK]
8853 args['nics'].append((ip, mac, mode, link))
8854 elif constants.DDM_REMOVE in nic_override:
8855 del args['nics'][-1]
8857 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8858 if self.op.disk_template:
8859 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8860 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8863 def CheckPrereq(self):
8864 """Check prerequisites.
8866 This only checks the instance list against the existing names.
8869 # checking the new params on the primary/secondary nodes
8871 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8872 cluster = self.cluster = self.cfg.GetClusterInfo()
8873 assert self.instance is not None, \
8874 "Cannot retrieve locked instance %s" % self.op.instance_name
8875 pnode = instance.primary_node
8876 nodelist = list(instance.all_nodes)
8879 if self.op.os_name and not self.op.force:
8880 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8881 self.op.force_variant)
8882 instance_os = self.op.os_name
8884 instance_os = instance.os
8886 if self.op.disk_template:
8887 if instance.disk_template == self.op.disk_template:
8888 raise errors.OpPrereqError("Instance already has disk template %s" %
8889 instance.disk_template, errors.ECODE_INVAL)
8891 if (instance.disk_template,
8892 self.op.disk_template) not in self._DISK_CONVERSIONS:
8893 raise errors.OpPrereqError("Unsupported disk template conversion from"
8894 " %s to %s" % (instance.disk_template,
8895 self.op.disk_template),
8897 _CheckInstanceDown(self, instance, "cannot change disk template")
8898 if self.op.disk_template in constants.DTS_NET_MIRROR:
8899 if self.op.remote_node == pnode:
8900 raise errors.OpPrereqError("Given new secondary node %s is the same"
8901 " as the primary node of the instance" %
8902 self.op.remote_node, errors.ECODE_STATE)
8903 _CheckNodeOnline(self, self.op.remote_node)
8904 _CheckNodeNotDrained(self, self.op.remote_node)
8905 disks = [{"size": d.size} for d in instance.disks]
8906 required = _ComputeDiskSize(self.op.disk_template, disks)
8907 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8909 # hvparams processing
8910 if self.op.hvparams:
8911 hv_type = instance.hypervisor
8912 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8913 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8914 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8917 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8918 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8919 self.hv_new = hv_new # the new actual values
8920 self.hv_inst = i_hvdict # the new dict (without defaults)
8922 self.hv_new = self.hv_inst = {}
8924 # beparams processing
8925 if self.op.beparams:
8926 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8928 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8929 be_new = cluster.SimpleFillBE(i_bedict)
8930 self.be_new = be_new # the new actual values
8931 self.be_inst = i_bedict # the new dict (without defaults)
8933 self.be_new = self.be_inst = {}
8935 # osparams processing
8936 if self.op.osparams:
8937 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8938 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8939 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8940 self.os_inst = i_osdict # the new dict (without defaults)
8942 self.os_new = self.os_inst = {}
8946 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8947 mem_check_list = [pnode]
8948 if be_new[constants.BE_AUTO_BALANCE]:
8949 # either we changed auto_balance to yes or it was from before
8950 mem_check_list.extend(instance.secondary_nodes)
8951 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8952 instance.hypervisor)
8953 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8954 instance.hypervisor)
8955 pninfo = nodeinfo[pnode]
8956 msg = pninfo.fail_msg
8958 # Assume the primary node is unreachable and go ahead
8959 self.warn.append("Can't get info from primary node %s: %s" %
8961 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8962 self.warn.append("Node data from primary node %s doesn't contain"
8963 " free memory information" % pnode)
8964 elif instance_info.fail_msg:
8965 self.warn.append("Can't get instance runtime information: %s" %
8966 instance_info.fail_msg)
8968 if instance_info.payload:
8969 current_mem = int(instance_info.payload['memory'])
8971 # Assume instance not running
8972 # (there is a slight race condition here, but it's not very probable,
8973 # and we have no other way to check)
8975 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8976 pninfo.payload['memory_free'])
8978 raise errors.OpPrereqError("This change will prevent the instance"
8979 " from starting, due to %d MB of memory"
8980 " missing on its primary node" % miss_mem,
8983 if be_new[constants.BE_AUTO_BALANCE]:
8984 for node, nres in nodeinfo.items():
8985 if node not in instance.secondary_nodes:
8989 self.warn.append("Can't get info from secondary node %s: %s" %
8991 elif not isinstance(nres.payload.get('memory_free', None), int):
8992 self.warn.append("Secondary node %s didn't return free"
8993 " memory information" % node)
8994 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8995 self.warn.append("Not enough memory to failover instance to"
8996 " secondary node %s" % node)
9001 for nic_op, nic_dict in self.op.nics:
9002 if nic_op == constants.DDM_REMOVE:
9003 if not instance.nics:
9004 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9007 if nic_op != constants.DDM_ADD:
9009 if not instance.nics:
9010 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9011 " no NICs" % nic_op,
9013 if nic_op < 0 or nic_op >= len(instance.nics):
9014 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9016 (nic_op, len(instance.nics) - 1),
9018 old_nic_params = instance.nics[nic_op].nicparams
9019 old_nic_ip = instance.nics[nic_op].ip
9024 update_params_dict = dict([(key, nic_dict[key])
9025 for key in constants.NICS_PARAMETERS
9026 if key in nic_dict])
9028 if 'bridge' in nic_dict:
9029 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9031 new_nic_params = _GetUpdatedParams(old_nic_params,
9033 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9034 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9035 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9036 self.nic_pinst[nic_op] = new_nic_params
9037 self.nic_pnew[nic_op] = new_filled_nic_params
9038 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9040 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9041 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9042 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9044 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9046 self.warn.append(msg)
9048 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9049 if new_nic_mode == constants.NIC_MODE_ROUTED:
9050 if 'ip' in nic_dict:
9051 nic_ip = nic_dict['ip']
9055 raise errors.OpPrereqError('Cannot set the nic ip to None'
9056 ' on a routed nic', errors.ECODE_INVAL)
9057 if 'mac' in nic_dict:
9058 nic_mac = nic_dict['mac']
9060 raise errors.OpPrereqError('Cannot set the nic mac to None',
9062 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9063 # otherwise generate the mac
9064 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9066 # or validate/reserve the current one
9068 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9069 except errors.ReservationError:
9070 raise errors.OpPrereqError("MAC address %s already in use"
9071 " in cluster" % nic_mac,
9072 errors.ECODE_NOTUNIQUE)
9075 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9076 raise errors.OpPrereqError("Disk operations not supported for"
9077 " diskless instances",
9079 for disk_op, _ in self.op.disks:
9080 if disk_op == constants.DDM_REMOVE:
9081 if len(instance.disks) == 1:
9082 raise errors.OpPrereqError("Cannot remove the last disk of"
9083 " an instance", errors.ECODE_INVAL)
9084 _CheckInstanceDown(self, instance, "cannot remove disks")
9086 if (disk_op == constants.DDM_ADD and
9087 len(instance.nics) >= constants.MAX_DISKS):
9088 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9089 " add more" % constants.MAX_DISKS,
9091 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9093 if disk_op < 0 or disk_op >= len(instance.disks):
9094 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9096 (disk_op, len(instance.disks)),
9101 def _ConvertPlainToDrbd(self, feedback_fn):
9102 """Converts an instance from plain to drbd.
9105 feedback_fn("Converting template to drbd")
9106 instance = self.instance
9107 pnode = instance.primary_node
9108 snode = self.op.remote_node
9110 # create a fake disk info for _GenerateDiskTemplate
9111 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9112 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9113 instance.name, pnode, [snode],
9114 disk_info, None, None, 0)
9115 info = _GetInstanceInfoText(instance)
9116 feedback_fn("Creating aditional volumes...")
9117 # first, create the missing data and meta devices
9118 for disk in new_disks:
9119 # unfortunately this is... not too nice
9120 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9122 for child in disk.children:
9123 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9124 # at this stage, all new LVs have been created, we can rename the
9126 feedback_fn("Renaming original volumes...")
9127 rename_list = [(o, n.children[0].logical_id)
9128 for (o, n) in zip(instance.disks, new_disks)]
9129 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9130 result.Raise("Failed to rename original LVs")
9132 feedback_fn("Initializing DRBD devices...")
9133 # all child devices are in place, we can now create the DRBD devices
9134 for disk in new_disks:
9135 for node in [pnode, snode]:
9136 f_create = node == pnode
9137 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9139 # at this point, the instance has been modified
9140 instance.disk_template = constants.DT_DRBD8
9141 instance.disks = new_disks
9142 self.cfg.Update(instance, feedback_fn)
9144 # disks are created, waiting for sync
9145 disk_abort = not _WaitForSync(self, instance)
9147 raise errors.OpExecError("There are some degraded disks for"
9148 " this instance, please cleanup manually")
9150 def _ConvertDrbdToPlain(self, feedback_fn):
9151 """Converts an instance from drbd to plain.
9154 instance = self.instance
9155 assert len(instance.secondary_nodes) == 1
9156 pnode = instance.primary_node
9157 snode = instance.secondary_nodes[0]
9158 feedback_fn("Converting template to plain")
9160 old_disks = instance.disks
9161 new_disks = [d.children[0] for d in old_disks]
9163 # copy over size and mode
9164 for parent, child in zip(old_disks, new_disks):
9165 child.size = parent.size
9166 child.mode = parent.mode
9168 # update instance structure
9169 instance.disks = new_disks
9170 instance.disk_template = constants.DT_PLAIN
9171 self.cfg.Update(instance, feedback_fn)
9173 feedback_fn("Removing volumes on the secondary node...")
9174 for disk in old_disks:
9175 self.cfg.SetDiskID(disk, snode)
9176 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9178 self.LogWarning("Could not remove block device %s on node %s,"
9179 " continuing anyway: %s", disk.iv_name, snode, msg)
9181 feedback_fn("Removing unneeded volumes on the primary node...")
9182 for idx, disk in enumerate(old_disks):
9183 meta = disk.children[1]
9184 self.cfg.SetDiskID(meta, pnode)
9185 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9187 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9188 " continuing anyway: %s", idx, pnode, msg)
9191 def Exec(self, feedback_fn):
9192 """Modifies an instance.
9194 All parameters take effect only at the next restart of the instance.
9197 # Process here the warnings from CheckPrereq, as we don't have a
9198 # feedback_fn there.
9199 for warn in self.warn:
9200 feedback_fn("WARNING: %s" % warn)
9203 instance = self.instance
9205 for disk_op, disk_dict in self.op.disks:
9206 if disk_op == constants.DDM_REMOVE:
9207 # remove the last disk
9208 device = instance.disks.pop()
9209 device_idx = len(instance.disks)
9210 for node, disk in device.ComputeNodeTree(instance.primary_node):
9211 self.cfg.SetDiskID(disk, node)
9212 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9214 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9215 " continuing anyway", device_idx, node, msg)
9216 result.append(("disk/%d" % device_idx, "remove"))
9217 elif disk_op == constants.DDM_ADD:
9219 if instance.disk_template == constants.DT_FILE:
9220 file_driver, file_path = instance.disks[0].logical_id
9221 file_path = os.path.dirname(file_path)
9223 file_driver = file_path = None
9224 disk_idx_base = len(instance.disks)
9225 new_disk = _GenerateDiskTemplate(self,
9226 instance.disk_template,
9227 instance.name, instance.primary_node,
9228 instance.secondary_nodes,
9233 instance.disks.append(new_disk)
9234 info = _GetInstanceInfoText(instance)
9236 logging.info("Creating volume %s for instance %s",
9237 new_disk.iv_name, instance.name)
9238 # Note: this needs to be kept in sync with _CreateDisks
9240 for node in instance.all_nodes:
9241 f_create = node == instance.primary_node
9243 _CreateBlockDev(self, node, instance, new_disk,
9244 f_create, info, f_create)
9245 except errors.OpExecError, err:
9246 self.LogWarning("Failed to create volume %s (%s) on"
9248 new_disk.iv_name, new_disk, node, err)
9249 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9250 (new_disk.size, new_disk.mode)))
9252 # change a given disk
9253 instance.disks[disk_op].mode = disk_dict['mode']
9254 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9256 if self.op.disk_template:
9257 r_shut = _ShutdownInstanceDisks(self, instance)
9259 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9260 " proceed with disk template conversion")
9261 mode = (instance.disk_template, self.op.disk_template)
9263 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9265 self.cfg.ReleaseDRBDMinors(instance.name)
9267 result.append(("disk_template", self.op.disk_template))
9270 for nic_op, nic_dict in self.op.nics:
9271 if nic_op == constants.DDM_REMOVE:
9272 # remove the last nic
9273 del instance.nics[-1]
9274 result.append(("nic.%d" % len(instance.nics), "remove"))
9275 elif nic_op == constants.DDM_ADD:
9276 # mac and bridge should be set, by now
9277 mac = nic_dict['mac']
9278 ip = nic_dict.get('ip', None)
9279 nicparams = self.nic_pinst[constants.DDM_ADD]
9280 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9281 instance.nics.append(new_nic)
9282 result.append(("nic.%d" % (len(instance.nics) - 1),
9283 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9284 (new_nic.mac, new_nic.ip,
9285 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9286 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9289 for key in 'mac', 'ip':
9291 setattr(instance.nics[nic_op], key, nic_dict[key])
9292 if nic_op in self.nic_pinst:
9293 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9294 for key, val in nic_dict.iteritems():
9295 result.append(("nic.%s/%d" % (key, nic_op), val))
9298 if self.op.hvparams:
9299 instance.hvparams = self.hv_inst
9300 for key, val in self.op.hvparams.iteritems():
9301 result.append(("hv/%s" % key, val))
9304 if self.op.beparams:
9305 instance.beparams = self.be_inst
9306 for key, val in self.op.beparams.iteritems():
9307 result.append(("be/%s" % key, val))
9311 instance.os = self.op.os_name
9314 if self.op.osparams:
9315 instance.osparams = self.os_inst
9316 for key, val in self.op.osparams.iteritems():
9317 result.append(("os/%s" % key, val))
9319 self.cfg.Update(instance, feedback_fn)
9323 _DISK_CONVERSIONS = {
9324 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9325 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9329 class LUQueryExports(NoHooksLU):
9330 """Query the exports list
9334 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9335 ("use_locking", False, _TBool),
9339 def ExpandNames(self):
9340 self.needed_locks = {}
9341 self.share_locks[locking.LEVEL_NODE] = 1
9342 if not self.op.nodes:
9343 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9345 self.needed_locks[locking.LEVEL_NODE] = \
9346 _GetWantedNodes(self, self.op.nodes)
9348 def Exec(self, feedback_fn):
9349 """Compute the list of all the exported system images.
9352 @return: a dictionary with the structure node->(export-list)
9353 where export-list is a list of the instances exported on
9357 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9358 rpcresult = self.rpc.call_export_list(self.nodes)
9360 for node in rpcresult:
9361 if rpcresult[node].fail_msg:
9362 result[node] = False
9364 result[node] = rpcresult[node].payload
9369 class LUPrepareExport(NoHooksLU):
9370 """Prepares an instance for an export and returns useful information.
9375 ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9379 def ExpandNames(self):
9380 self._ExpandAndLockInstance()
9382 def CheckPrereq(self):
9383 """Check prerequisites.
9386 instance_name = self.op.instance_name
9388 self.instance = self.cfg.GetInstanceInfo(instance_name)
9389 assert self.instance is not None, \
9390 "Cannot retrieve locked instance %s" % self.op.instance_name
9391 _CheckNodeOnline(self, self.instance.primary_node)
9393 self._cds = _GetClusterDomainSecret()
9395 def Exec(self, feedback_fn):
9396 """Prepares an instance for an export.
9399 instance = self.instance
9401 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9402 salt = utils.GenerateSecret(8)
9404 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9405 result = self.rpc.call_x509_cert_create(instance.primary_node,
9406 constants.RIE_CERT_VALIDITY)
9407 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9409 (name, cert_pem) = result.payload
9411 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9415 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9416 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9418 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9424 class LUExportInstance(LogicalUnit):
9425 """Export an instance to an image in the cluster.
9428 HPATH = "instance-export"
9429 HTYPE = constants.HTYPE_INSTANCE
9432 ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9433 ("shutdown", True, _TBool),
9435 ("remove_instance", False, _TBool),
9436 ("ignore_remove_failures", False, _TBool),
9437 ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9438 ("x509_key_name", None, _TOr(_TList, _TNone)),
9439 ("destination_x509_ca", None, _TMaybeString),
9443 def CheckArguments(self):
9444 """Check the arguments.
9447 self.x509_key_name = self.op.x509_key_name
9448 self.dest_x509_ca_pem = self.op.destination_x509_ca
9450 if self.op.remove_instance and not self.op.shutdown:
9451 raise errors.OpPrereqError("Can not remove instance without shutting it"
9454 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9455 if not self.x509_key_name:
9456 raise errors.OpPrereqError("Missing X509 key name for encryption",
9459 if not self.dest_x509_ca_pem:
9460 raise errors.OpPrereqError("Missing destination X509 CA",
9463 def ExpandNames(self):
9464 self._ExpandAndLockInstance()
9466 # Lock all nodes for local exports
9467 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9468 # FIXME: lock only instance primary and destination node
9470 # Sad but true, for now we have do lock all nodes, as we don't know where
9471 # the previous export might be, and in this LU we search for it and
9472 # remove it from its current node. In the future we could fix this by:
9473 # - making a tasklet to search (share-lock all), then create the
9474 # new one, then one to remove, after
9475 # - removing the removal operation altogether
9476 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9478 def DeclareLocks(self, level):
9479 """Last minute lock declaration."""
9480 # All nodes are locked anyway, so nothing to do here.
9482 def BuildHooksEnv(self):
9485 This will run on the master, primary node and target node.
9489 "EXPORT_MODE": self.op.mode,
9490 "EXPORT_NODE": self.op.target_node,
9491 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9492 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9493 # TODO: Generic function for boolean env variables
9494 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9497 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9499 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9501 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9502 nl.append(self.op.target_node)
9506 def CheckPrereq(self):
9507 """Check prerequisites.
9509 This checks that the instance and node names are valid.
9512 instance_name = self.op.instance_name
9514 self.instance = self.cfg.GetInstanceInfo(instance_name)
9515 assert self.instance is not None, \
9516 "Cannot retrieve locked instance %s" % self.op.instance_name
9517 _CheckNodeOnline(self, self.instance.primary_node)
9519 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9520 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9521 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9522 assert self.dst_node is not None
9524 _CheckNodeOnline(self, self.dst_node.name)
9525 _CheckNodeNotDrained(self, self.dst_node.name)
9528 self.dest_disk_info = None
9529 self.dest_x509_ca = None
9531 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9532 self.dst_node = None
9534 if len(self.op.target_node) != len(self.instance.disks):
9535 raise errors.OpPrereqError(("Received destination information for %s"
9536 " disks, but instance %s has %s disks") %
9537 (len(self.op.target_node), instance_name,
9538 len(self.instance.disks)),
9541 cds = _GetClusterDomainSecret()
9543 # Check X509 key name
9545 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9546 except (TypeError, ValueError), err:
9547 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9549 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9550 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9553 # Load and verify CA
9555 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9556 except OpenSSL.crypto.Error, err:
9557 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9558 (err, ), errors.ECODE_INVAL)
9560 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9561 if errcode is not None:
9562 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9563 (msg, ), errors.ECODE_INVAL)
9565 self.dest_x509_ca = cert
9567 # Verify target information
9569 for idx, disk_data in enumerate(self.op.target_node):
9571 (host, port, magic) = \
9572 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9573 except errors.GenericError, err:
9574 raise errors.OpPrereqError("Target info for disk %s: %s" %
9575 (idx, err), errors.ECODE_INVAL)
9577 disk_info.append((host, port, magic))
9579 assert len(disk_info) == len(self.op.target_node)
9580 self.dest_disk_info = disk_info
9583 raise errors.ProgrammerError("Unhandled export mode %r" %
9586 # instance disk type verification
9587 # TODO: Implement export support for file-based disks
9588 for disk in self.instance.disks:
9589 if disk.dev_type == constants.LD_FILE:
9590 raise errors.OpPrereqError("Export not supported for instances with"
9591 " file-based disks", errors.ECODE_INVAL)
9593 def _CleanupExports(self, feedback_fn):
9594 """Removes exports of current instance from all other nodes.
9596 If an instance in a cluster with nodes A..D was exported to node C, its
9597 exports will be removed from the nodes A, B and D.
9600 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9602 nodelist = self.cfg.GetNodeList()
9603 nodelist.remove(self.dst_node.name)
9605 # on one-node clusters nodelist will be empty after the removal
9606 # if we proceed the backup would be removed because OpQueryExports
9607 # substitutes an empty list with the full cluster node list.
9608 iname = self.instance.name
9610 feedback_fn("Removing old exports for instance %s" % iname)
9611 exportlist = self.rpc.call_export_list(nodelist)
9612 for node in exportlist:
9613 if exportlist[node].fail_msg:
9615 if iname in exportlist[node].payload:
9616 msg = self.rpc.call_export_remove(node, iname).fail_msg
9618 self.LogWarning("Could not remove older export for instance %s"
9619 " on node %s: %s", iname, node, msg)
9621 def Exec(self, feedback_fn):
9622 """Export an instance to an image in the cluster.
9625 assert self.op.mode in constants.EXPORT_MODES
9627 instance = self.instance
9628 src_node = instance.primary_node
9630 if self.op.shutdown:
9631 # shutdown the instance, but not the disks
9632 feedback_fn("Shutting down instance %s" % instance.name)
9633 result = self.rpc.call_instance_shutdown(src_node, instance,
9634 self.op.shutdown_timeout)
9635 # TODO: Maybe ignore failures if ignore_remove_failures is set
9636 result.Raise("Could not shutdown instance %s on"
9637 " node %s" % (instance.name, src_node))
9639 # set the disks ID correctly since call_instance_start needs the
9640 # correct drbd minor to create the symlinks
9641 for disk in instance.disks:
9642 self.cfg.SetDiskID(disk, src_node)
9644 activate_disks = (not instance.admin_up)
9647 # Activate the instance disks if we'exporting a stopped instance
9648 feedback_fn("Activating disks for %s" % instance.name)
9649 _StartInstanceDisks(self, instance, None)
9652 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9655 helper.CreateSnapshots()
9657 if (self.op.shutdown and instance.admin_up and
9658 not self.op.remove_instance):
9659 assert not activate_disks
9660 feedback_fn("Starting instance %s" % instance.name)
9661 result = self.rpc.call_instance_start(src_node, instance, None, None)
9662 msg = result.fail_msg
9664 feedback_fn("Failed to start instance: %s" % msg)
9665 _ShutdownInstanceDisks(self, instance)
9666 raise errors.OpExecError("Could not start instance: %s" % msg)
9668 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9669 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9670 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9671 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9672 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9674 (key_name, _, _) = self.x509_key_name
9677 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9680 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9681 key_name, dest_ca_pem,
9686 # Check for backwards compatibility
9687 assert len(dresults) == len(instance.disks)
9688 assert compat.all(isinstance(i, bool) for i in dresults), \
9689 "Not all results are boolean: %r" % dresults
9693 feedback_fn("Deactivating disks for %s" % instance.name)
9694 _ShutdownInstanceDisks(self, instance)
9696 if not (compat.all(dresults) and fin_resu):
9699 failures.append("export finalization")
9700 if not compat.all(dresults):
9701 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9703 failures.append("disk export: disk(s) %s" % fdsk)
9705 raise errors.OpExecError("Export failed, errors in %s" %
9706 utils.CommaJoin(failures))
9708 # At this point, the export was successful, we can cleanup/finish
9710 # Remove instance if requested
9711 if self.op.remove_instance:
9712 feedback_fn("Removing instance %s" % instance.name)
9713 _RemoveInstance(self, feedback_fn, instance,
9714 self.op.ignore_remove_failures)
9716 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9717 self._CleanupExports(feedback_fn)
9719 return fin_resu, dresults
9722 class LURemoveExport(NoHooksLU):
9723 """Remove exports related to the named instance.
9731 def ExpandNames(self):
9732 self.needed_locks = {}
9733 # We need all nodes to be locked in order for RemoveExport to work, but we
9734 # don't need to lock the instance itself, as nothing will happen to it (and
9735 # we can remove exports also for a removed instance)
9736 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9738 def Exec(self, feedback_fn):
9739 """Remove any export.
9742 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9743 # If the instance was not found we'll try with the name that was passed in.
9744 # This will only work if it was an FQDN, though.
9746 if not instance_name:
9748 instance_name = self.op.instance_name
9750 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9751 exportlist = self.rpc.call_export_list(locked_nodes)
9753 for node in exportlist:
9754 msg = exportlist[node].fail_msg
9756 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9758 if instance_name in exportlist[node].payload:
9760 result = self.rpc.call_export_remove(node, instance_name)
9761 msg = result.fail_msg
9763 logging.error("Could not remove export for instance %s"
9764 " on node %s: %s", instance_name, node, msg)
9766 if fqdn_warn and not found:
9767 feedback_fn("Export not found. If trying to remove an export belonging"
9768 " to a deleted instance please use its Fully Qualified"
9772 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9775 This is an abstract class which is the parent of all the other tags LUs.
9779 def ExpandNames(self):
9780 self.needed_locks = {}
9781 if self.op.kind == constants.TAG_NODE:
9782 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9783 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9784 elif self.op.kind == constants.TAG_INSTANCE:
9785 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9786 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9788 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
9789 # not possible to acquire the BGL based on opcode parameters)
9791 def CheckPrereq(self):
9792 """Check prerequisites.
9795 if self.op.kind == constants.TAG_CLUSTER:
9796 self.target = self.cfg.GetClusterInfo()
9797 elif self.op.kind == constants.TAG_NODE:
9798 self.target = self.cfg.GetNodeInfo(self.op.name)
9799 elif self.op.kind == constants.TAG_INSTANCE:
9800 self.target = self.cfg.GetInstanceInfo(self.op.name)
9802 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9803 str(self.op.kind), errors.ECODE_INVAL)
9806 class LUGetTags(TagsLU):
9807 """Returns the tags of a given object.
9811 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9812 # Name is only meaningful for nodes and instances
9813 ("name", _NoDefault, _TMaybeString),
9817 def ExpandNames(self):
9818 TagsLU.ExpandNames(self)
9820 # Share locks as this is only a read operation
9821 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9823 def Exec(self, feedback_fn):
9824 """Returns the tag list.
9827 return list(self.target.GetTags())
9830 class LUSearchTags(NoHooksLU):
9831 """Searches the tags for a given pattern.
9835 ("pattern", _NoDefault, _TNonEmptyString),
9839 def ExpandNames(self):
9840 self.needed_locks = {}
9842 def CheckPrereq(self):
9843 """Check prerequisites.
9845 This checks the pattern passed for validity by compiling it.
9849 self.re = re.compile(self.op.pattern)
9850 except re.error, err:
9851 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9852 (self.op.pattern, err), errors.ECODE_INVAL)
9854 def Exec(self, feedback_fn):
9855 """Returns the tag list.
9859 tgts = [("/cluster", cfg.GetClusterInfo())]
9860 ilist = cfg.GetAllInstancesInfo().values()
9861 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9862 nlist = cfg.GetAllNodesInfo().values()
9863 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9865 for path, target in tgts:
9866 for tag in target.GetTags():
9867 if self.re.search(tag):
9868 results.append((path, tag))
9872 class LUAddTags(TagsLU):
9873 """Sets a tag on a given object.
9877 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9878 # Name is only meaningful for nodes and instances
9879 ("name", _NoDefault, _TMaybeString),
9880 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9884 def CheckPrereq(self):
9885 """Check prerequisites.
9887 This checks the type and length of the tag name and value.
9890 TagsLU.CheckPrereq(self)
9891 for tag in self.op.tags:
9892 objects.TaggableObject.ValidateTag(tag)
9894 def Exec(self, feedback_fn):
9899 for tag in self.op.tags:
9900 self.target.AddTag(tag)
9901 except errors.TagError, err:
9902 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9903 self.cfg.Update(self.target, feedback_fn)
9906 class LUDelTags(TagsLU):
9907 """Delete a list of tags from a given object.
9911 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9912 # Name is only meaningful for nodes and instances
9913 ("name", _NoDefault, _TMaybeString),
9914 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9918 def CheckPrereq(self):
9919 """Check prerequisites.
9921 This checks that we have the given tag.
9924 TagsLU.CheckPrereq(self)
9925 for tag in self.op.tags:
9926 objects.TaggableObject.ValidateTag(tag)
9927 del_tags = frozenset(self.op.tags)
9928 cur_tags = self.target.GetTags()
9930 diff_tags = del_tags - cur_tags
9932 diff_names = ("'%s'" % i for i in sorted(diff_tags))
9933 raise errors.OpPrereqError("Tag(s) %s not found" %
9934 (utils.CommaJoin(diff_names), ),
9937 def Exec(self, feedback_fn):
9938 """Remove the tag from the object.
9941 for tag in self.op.tags:
9942 self.target.RemoveTag(tag)
9943 self.cfg.Update(self.target, feedback_fn)
9946 class LUTestDelay(NoHooksLU):
9947 """Sleep for a specified amount of time.
9949 This LU sleeps on the master and/or nodes for a specified amount of
9954 ("duration", _NoDefault, _TFloat),
9955 ("on_master", True, _TBool),
9956 ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9957 ("repeat", 0, _TPositiveInt)
9961 def ExpandNames(self):
9962 """Expand names and set required locks.
9964 This expands the node list, if any.
9967 self.needed_locks = {}
9968 if self.op.on_nodes:
9969 # _GetWantedNodes can be used here, but is not always appropriate to use
9970 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9972 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9973 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9975 def _TestDelay(self):
9976 """Do the actual sleep.
9979 if self.op.on_master:
9980 if not utils.TestDelay(self.op.duration):
9981 raise errors.OpExecError("Error during master delay test")
9982 if self.op.on_nodes:
9983 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9984 for node, node_result in result.items():
9985 node_result.Raise("Failure during rpc call to node %s" % node)
9987 def Exec(self, feedback_fn):
9988 """Execute the test delay opcode, with the wanted repetitions.
9991 if self.op.repeat == 0:
9994 top_value = self.op.repeat - 1
9995 for i in range(self.op.repeat):
9996 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10000 class LUTestJobqueue(NoHooksLU):
10001 """Utility LU to test some aspects of the job queue.
10005 ("notify_waitlock", False, _TBool),
10006 ("notify_exec", False, _TBool),
10007 ("log_messages", _EmptyList, _TListOf(_TString)),
10008 ("fail", False, _TBool),
10012 # Must be lower than default timeout for WaitForJobChange to see whether it
10013 # notices changed jobs
10014 _CLIENT_CONNECT_TIMEOUT = 20.0
10015 _CLIENT_CONFIRM_TIMEOUT = 60.0
10018 def _NotifyUsingSocket(cls, cb, errcls):
10019 """Opens a Unix socket and waits for another program to connect.
10022 @param cb: Callback to send socket name to client
10023 @type errcls: class
10024 @param errcls: Exception class to use for errors
10027 # Using a temporary directory as there's no easy way to create temporary
10028 # sockets without writing a custom loop around tempfile.mktemp and
10030 tmpdir = tempfile.mkdtemp()
10032 tmpsock = utils.PathJoin(tmpdir, "sock")
10034 logging.debug("Creating temporary socket at %s", tmpsock)
10035 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10040 # Send details to client
10043 # Wait for client to connect before continuing
10044 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10046 (conn, _) = sock.accept()
10047 except socket.error, err:
10048 raise errcls("Client didn't connect in time (%s)" % err)
10052 # Remove as soon as client is connected
10053 shutil.rmtree(tmpdir)
10055 # Wait for client to close
10058 # pylint: disable-msg=E1101
10059 # Instance of '_socketobject' has no ... member
10060 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10062 except socket.error, err:
10063 raise errcls("Client failed to confirm notification (%s)" % err)
10067 def _SendNotification(self, test, arg, sockname):
10068 """Sends a notification to the client.
10071 @param test: Test name
10072 @param arg: Test argument (depends on test)
10073 @type sockname: string
10074 @param sockname: Socket path
10077 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10079 def _Notify(self, prereq, test, arg):
10080 """Notifies the client of a test.
10083 @param prereq: Whether this is a prereq-phase test
10085 @param test: Test name
10086 @param arg: Test argument (depends on test)
10090 errcls = errors.OpPrereqError
10092 errcls = errors.OpExecError
10094 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10098 def CheckArguments(self):
10099 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10100 self.expandnames_calls = 0
10102 def ExpandNames(self):
10103 checkargs_calls = getattr(self, "checkargs_calls", 0)
10104 if checkargs_calls < 1:
10105 raise errors.ProgrammerError("CheckArguments was not called")
10107 self.expandnames_calls += 1
10109 if self.op.notify_waitlock:
10110 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10112 self.LogInfo("Expanding names")
10114 # Get lock on master node (just to get a lock, not for a particular reason)
10115 self.needed_locks = {
10116 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10119 def Exec(self, feedback_fn):
10120 if self.expandnames_calls < 1:
10121 raise errors.ProgrammerError("ExpandNames was not called")
10123 if self.op.notify_exec:
10124 self._Notify(False, constants.JQT_EXEC, None)
10126 self.LogInfo("Executing")
10128 if self.op.log_messages:
10129 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10130 for idx, msg in enumerate(self.op.log_messages):
10131 self.LogInfo("Sending log message %s", idx + 1)
10132 feedback_fn(constants.JQT_MSGPREFIX + msg)
10133 # Report how many test messages have been sent
10134 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10137 raise errors.OpExecError("Opcode failure was requested")
10142 class IAllocator(object):
10143 """IAllocator framework.
10145 An IAllocator instance has three sets of attributes:
10146 - cfg that is needed to query the cluster
10147 - input data (all members of the _KEYS class attribute are required)
10148 - four buffer attributes (in|out_data|text), that represent the
10149 input (to the external script) in text and data structure format,
10150 and the output from it, again in two formats
10151 - the result variables from the script (success, info, nodes) for
10155 # pylint: disable-msg=R0902
10156 # lots of instance attributes
10158 "name", "mem_size", "disks", "disk_template",
10159 "os", "tags", "nics", "vcpus", "hypervisor",
10162 "name", "relocate_from",
10168 def __init__(self, cfg, rpc, mode, **kwargs):
10171 # init buffer variables
10172 self.in_text = self.out_text = self.in_data = self.out_data = None
10173 # init all input fields so that pylint is happy
10175 self.mem_size = self.disks = self.disk_template = None
10176 self.os = self.tags = self.nics = self.vcpus = None
10177 self.hypervisor = None
10178 self.relocate_from = None
10180 self.evac_nodes = None
10182 self.required_nodes = None
10183 # init result fields
10184 self.success = self.info = self.result = None
10185 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10186 keyset = self._ALLO_KEYS
10187 fn = self._AddNewInstance
10188 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10189 keyset = self._RELO_KEYS
10190 fn = self._AddRelocateInstance
10191 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10192 keyset = self._EVAC_KEYS
10193 fn = self._AddEvacuateNodes
10195 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10196 " IAllocator" % self.mode)
10198 if key not in keyset:
10199 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10200 " IAllocator" % key)
10201 setattr(self, key, kwargs[key])
10204 if key not in kwargs:
10205 raise errors.ProgrammerError("Missing input parameter '%s' to"
10206 " IAllocator" % key)
10207 self._BuildInputData(fn)
10209 def _ComputeClusterData(self):
10210 """Compute the generic allocator input data.
10212 This is the data that is independent of the actual operation.
10216 cluster_info = cfg.GetClusterInfo()
10219 "version": constants.IALLOCATOR_VERSION,
10220 "cluster_name": cfg.GetClusterName(),
10221 "cluster_tags": list(cluster_info.GetTags()),
10222 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10223 # we don't have job IDs
10225 iinfo = cfg.GetAllInstancesInfo().values()
10226 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10230 node_list = cfg.GetNodeList()
10232 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10233 hypervisor_name = self.hypervisor
10234 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10235 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10236 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10237 hypervisor_name = cluster_info.enabled_hypervisors[0]
10239 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10242 self.rpc.call_all_instances_info(node_list,
10243 cluster_info.enabled_hypervisors)
10244 for nname, nresult in node_data.items():
10245 # first fill in static (config-based) values
10246 ninfo = cfg.GetNodeInfo(nname)
10248 "tags": list(ninfo.GetTags()),
10249 "primary_ip": ninfo.primary_ip,
10250 "secondary_ip": ninfo.secondary_ip,
10251 "offline": ninfo.offline,
10252 "drained": ninfo.drained,
10253 "master_candidate": ninfo.master_candidate,
10256 if not (ninfo.offline or ninfo.drained):
10257 nresult.Raise("Can't get data for node %s" % nname)
10258 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10260 remote_info = nresult.payload
10262 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10263 'vg_size', 'vg_free', 'cpu_total']:
10264 if attr not in remote_info:
10265 raise errors.OpExecError("Node '%s' didn't return attribute"
10266 " '%s'" % (nname, attr))
10267 if not isinstance(remote_info[attr], int):
10268 raise errors.OpExecError("Node '%s' returned invalid value"
10270 (nname, attr, remote_info[attr]))
10271 # compute memory used by primary instances
10272 i_p_mem = i_p_up_mem = 0
10273 for iinfo, beinfo in i_list:
10274 if iinfo.primary_node == nname:
10275 i_p_mem += beinfo[constants.BE_MEMORY]
10276 if iinfo.name not in node_iinfo[nname].payload:
10279 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10280 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10281 remote_info['memory_free'] -= max(0, i_mem_diff)
10284 i_p_up_mem += beinfo[constants.BE_MEMORY]
10286 # compute memory used by instances
10288 "total_memory": remote_info['memory_total'],
10289 "reserved_memory": remote_info['memory_dom0'],
10290 "free_memory": remote_info['memory_free'],
10291 "total_disk": remote_info['vg_size'],
10292 "free_disk": remote_info['vg_free'],
10293 "total_cpus": remote_info['cpu_total'],
10294 "i_pri_memory": i_p_mem,
10295 "i_pri_up_memory": i_p_up_mem,
10297 pnr.update(pnr_dyn)
10299 node_results[nname] = pnr
10300 data["nodes"] = node_results
10304 for iinfo, beinfo in i_list:
10306 for nic in iinfo.nics:
10307 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10308 nic_dict = {"mac": nic.mac,
10310 "mode": filled_params[constants.NIC_MODE],
10311 "link": filled_params[constants.NIC_LINK],
10313 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10314 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10315 nic_data.append(nic_dict)
10317 "tags": list(iinfo.GetTags()),
10318 "admin_up": iinfo.admin_up,
10319 "vcpus": beinfo[constants.BE_VCPUS],
10320 "memory": beinfo[constants.BE_MEMORY],
10322 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10324 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10325 "disk_template": iinfo.disk_template,
10326 "hypervisor": iinfo.hypervisor,
10328 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10330 instance_data[iinfo.name] = pir
10332 data["instances"] = instance_data
10334 self.in_data = data
10336 def _AddNewInstance(self):
10337 """Add new instance data to allocator structure.
10339 This in combination with _AllocatorGetClusterData will create the
10340 correct structure needed as input for the allocator.
10342 The checks for the completeness of the opcode must have already been
10346 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10348 if self.disk_template in constants.DTS_NET_MIRROR:
10349 self.required_nodes = 2
10351 self.required_nodes = 1
10354 "disk_template": self.disk_template,
10357 "vcpus": self.vcpus,
10358 "memory": self.mem_size,
10359 "disks": self.disks,
10360 "disk_space_total": disk_space,
10362 "required_nodes": self.required_nodes,
10366 def _AddRelocateInstance(self):
10367 """Add relocate instance data to allocator structure.
10369 This in combination with _IAllocatorGetClusterData will create the
10370 correct structure needed as input for the allocator.
10372 The checks for the completeness of the opcode must have already been
10376 instance = self.cfg.GetInstanceInfo(self.name)
10377 if instance is None:
10378 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10379 " IAllocator" % self.name)
10381 if instance.disk_template not in constants.DTS_NET_MIRROR:
10382 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10383 errors.ECODE_INVAL)
10385 if len(instance.secondary_nodes) != 1:
10386 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10387 errors.ECODE_STATE)
10389 self.required_nodes = 1
10390 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10391 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10395 "disk_space_total": disk_space,
10396 "required_nodes": self.required_nodes,
10397 "relocate_from": self.relocate_from,
10401 def _AddEvacuateNodes(self):
10402 """Add evacuate nodes data to allocator structure.
10406 "evac_nodes": self.evac_nodes
10410 def _BuildInputData(self, fn):
10411 """Build input data structures.
10414 self._ComputeClusterData()
10417 request["type"] = self.mode
10418 self.in_data["request"] = request
10420 self.in_text = serializer.Dump(self.in_data)
10422 def Run(self, name, validate=True, call_fn=None):
10423 """Run an instance allocator and return the results.
10426 if call_fn is None:
10427 call_fn = self.rpc.call_iallocator_runner
10429 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10430 result.Raise("Failure while running the iallocator script")
10432 self.out_text = result.payload
10434 self._ValidateResult()
10436 def _ValidateResult(self):
10437 """Process the allocator results.
10439 This will process and if successful save the result in
10440 self.out_data and the other parameters.
10444 rdict = serializer.Load(self.out_text)
10445 except Exception, err:
10446 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10448 if not isinstance(rdict, dict):
10449 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10451 # TODO: remove backwards compatiblity in later versions
10452 if "nodes" in rdict and "result" not in rdict:
10453 rdict["result"] = rdict["nodes"]
10456 for key in "success", "info", "result":
10457 if key not in rdict:
10458 raise errors.OpExecError("Can't parse iallocator results:"
10459 " missing key '%s'" % key)
10460 setattr(self, key, rdict[key])
10462 if not isinstance(rdict["result"], list):
10463 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10465 self.out_data = rdict
10468 class LUTestAllocator(NoHooksLU):
10469 """Run allocator tests.
10471 This LU runs the allocator tests
10475 ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10476 ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10477 ("name", _NoDefault, _TNonEmptyString),
10478 ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10479 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10480 _TOr(_TNone, _TNonEmptyString))))),
10481 ("disks", _NoDefault, _TOr(_TNone, _TList)),
10482 ("hypervisor", None, _TMaybeString),
10483 ("allocator", None, _TMaybeString),
10484 ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10485 ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10486 ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10487 ("os", None, _TMaybeString),
10488 ("disk_template", None, _TMaybeString),
10489 ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10492 def CheckPrereq(self):
10493 """Check prerequisites.
10495 This checks the opcode parameters depending on the director and mode test.
10498 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10499 for attr in ["mem_size", "disks", "disk_template",
10500 "os", "tags", "nics", "vcpus"]:
10501 if not hasattr(self.op, attr):
10502 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10503 attr, errors.ECODE_INVAL)
10504 iname = self.cfg.ExpandInstanceName(self.op.name)
10505 if iname is not None:
10506 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10507 iname, errors.ECODE_EXISTS)
10508 if not isinstance(self.op.nics, list):
10509 raise errors.OpPrereqError("Invalid parameter 'nics'",
10510 errors.ECODE_INVAL)
10511 if not isinstance(self.op.disks, list):
10512 raise errors.OpPrereqError("Invalid parameter 'disks'",
10513 errors.ECODE_INVAL)
10514 for row in self.op.disks:
10515 if (not isinstance(row, dict) or
10516 "size" not in row or
10517 not isinstance(row["size"], int) or
10518 "mode" not in row or
10519 row["mode"] not in ['r', 'w']):
10520 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10521 " parameter", errors.ECODE_INVAL)
10522 if self.op.hypervisor is None:
10523 self.op.hypervisor = self.cfg.GetHypervisorType()
10524 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10525 fname = _ExpandInstanceName(self.cfg, self.op.name)
10526 self.op.name = fname
10527 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10528 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10529 if not hasattr(self.op, "evac_nodes"):
10530 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10531 " opcode input", errors.ECODE_INVAL)
10533 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10534 self.op.mode, errors.ECODE_INVAL)
10536 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10537 if self.op.allocator is None:
10538 raise errors.OpPrereqError("Missing allocator name",
10539 errors.ECODE_INVAL)
10540 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10541 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10542 self.op.direction, errors.ECODE_INVAL)
10544 def Exec(self, feedback_fn):
10545 """Run the allocator test.
10548 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10549 ial = IAllocator(self.cfg, self.rpc,
10552 mem_size=self.op.mem_size,
10553 disks=self.op.disks,
10554 disk_template=self.op.disk_template,
10558 vcpus=self.op.vcpus,
10559 hypervisor=self.op.hypervisor,
10561 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10562 ial = IAllocator(self.cfg, self.rpc,
10565 relocate_from=list(self.relocate_from),
10567 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10568 ial = IAllocator(self.cfg, self.rpc,
10570 evac_nodes=self.op.evac_nodes)
10572 raise errors.ProgrammerError("Uncatched mode %s in"
10573 " LUTestAllocator.Exec", self.op.mode)
10575 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10576 result = ial.in_text
10578 ial.Run(self.op.allocator, validate=False)
10579 result = ial.out_text