4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
57 import ganeti.masterd.instance # pylint: disable-msg=W0611
60 # Modifiable default values; need to define these here before the
64 """Returns an empty list.
71 """Returns an empty dict.
77 #: The without-default default value
81 #: The no-type (value to complex to check it in the type system)
87 """Checks if the given value is not None.
90 return val is not None
94 """Checks if the given value is None.
101 """Checks if the given value is a boolean.
104 return isinstance(val, bool)
108 """Checks if the given value is an integer.
111 return isinstance(val, int)
115 """Checks if the given value is a float.
118 return isinstance(val, float)
122 """Checks if the given value is a string.
125 return isinstance(val, basestring)
129 """Checks if a given value evaluates to a boolean True value.
135 def _TElemOf(target_list):
136 """Builds a function that checks if a given value is a member of a list.
139 return lambda val: val in target_list
144 """Checks if the given value is a list.
147 return isinstance(val, list)
151 """Checks if the given value is a dictionary.
154 return isinstance(val, dict)
157 def _TIsLength(size):
158 """Check is the given container is of the given size.
161 return lambda container: len(container) == size
166 """Combine multiple functions using an AND operation.
170 return compat.all(t(val) for t in args)
175 """Combine multiple functions using an AND operation.
179 return compat.any(t(val) for t in args)
184 """Checks that a modified version of the argument passes the given test.
187 return lambda val: test(fn(val))
192 #: a non-empty string
193 _TNonEmptyString = _TAnd(_TString, _TTrue)
196 #: a maybe non-empty string
197 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
200 #: a maybe boolean (bool or none)
201 _TMaybeBool = _TOr(_TBool, _TNone)
204 #: a positive integer
205 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
207 #: a strictly positive integer
208 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
211 def _TListOf(my_type):
212 """Checks if a given value is a list with all elements of the same type.
216 lambda lst: compat.all(my_type(v) for v in lst))
219 def _TDictOf(key_type, val_type):
220 """Checks a dict type for the type of its key/values.
224 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
225 and compat.all(val_type(v)
226 for v in my_dict.values())))
229 # Common opcode attributes
231 #: output fields for a query operation
232 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
235 #: the shutdown timeout
236 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
239 #: the force parameter
240 _PForce = ("force", False, _TBool)
242 #: a required instance name (for single-instance LUs)
243 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
246 #: a required node name (for single-node LUs)
247 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
249 #: the migration type (live/non-live)
250 _PMigrationMode = ("mode", None, _TOr(_TNone,
251 _TElemOf(constants.HT_MIGRATION_MODES)))
253 #: the obsolete 'live' mode (boolean)
254 _PMigrationLive = ("live", None, _TMaybeBool)
258 class LogicalUnit(object):
259 """Logical Unit base class.
261 Subclasses must follow these rules:
262 - implement ExpandNames
263 - implement CheckPrereq (except when tasklets are used)
264 - implement Exec (except when tasklets are used)
265 - implement BuildHooksEnv
266 - redefine HPATH and HTYPE
267 - optionally redefine their run requirements:
268 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
270 Note that all commands require root permissions.
272 @ivar dry_run_result: the value (if any) that will be returned to the caller
273 in dry-run mode (signalled by opcode dry_run parameter)
274 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
275 they should get if not already defined, and types they must match
283 def __init__(self, processor, op, context, rpc):
284 """Constructor for LogicalUnit.
286 This needs to be overridden in derived classes in order to check op
290 self.proc = processor
292 self.cfg = context.cfg
293 self.context = context
295 # Dicts used to declare locking needs to mcpu
296 self.needed_locks = None
297 self.acquired_locks = {}
298 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
300 self.remove_locks = {}
301 # Used to force good behavior when calling helper functions
302 self.recalculate_locks = {}
305 self.Log = processor.Log # pylint: disable-msg=C0103
306 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
307 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
308 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
309 # support for dry-run
310 self.dry_run_result = None
311 # support for generic debug attribute
312 if (not hasattr(self.op, "debug_level") or
313 not isinstance(self.op.debug_level, int)):
314 self.op.debug_level = 0
319 # The new kind-of-type-system
320 op_id = self.op.OP_ID
321 for attr_name, aval, test in self._OP_PARAMS:
322 if not hasattr(op, attr_name):
323 if aval == _NoDefault:
324 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
325 (op_id, attr_name), errors.ECODE_INVAL)
331 setattr(self.op, attr_name, dval)
332 attr_val = getattr(op, attr_name)
336 if not callable(test):
337 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
338 " given type is not a proper type (%s)" %
339 (op_id, attr_name, test))
340 if not test(attr_val):
341 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
342 self.op.OP_ID, attr_name, type(attr_val), attr_val)
343 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
344 (op_id, attr_name), errors.ECODE_INVAL)
346 self.CheckArguments()
349 """Returns the SshRunner object
353 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
356 ssh = property(fget=__GetSSH)
358 def CheckArguments(self):
359 """Check syntactic validity for the opcode arguments.
361 This method is for doing a simple syntactic check and ensure
362 validity of opcode parameters, without any cluster-related
363 checks. While the same can be accomplished in ExpandNames and/or
364 CheckPrereq, doing these separate is better because:
366 - ExpandNames is left as as purely a lock-related function
367 - CheckPrereq is run after we have acquired locks (and possible
370 The function is allowed to change the self.op attribute so that
371 later methods can no longer worry about missing parameters.
376 def ExpandNames(self):
377 """Expand names for this LU.
379 This method is called before starting to execute the opcode, and it should
380 update all the parameters of the opcode to their canonical form (e.g. a
381 short node name must be fully expanded after this method has successfully
382 completed). This way locking, hooks, logging, ecc. can work correctly.
384 LUs which implement this method must also populate the self.needed_locks
385 member, as a dict with lock levels as keys, and a list of needed lock names
388 - use an empty dict if you don't need any lock
389 - if you don't need any lock at a particular level omit that level
390 - don't put anything for the BGL level
391 - if you want all locks at a level use locking.ALL_SET as a value
393 If you need to share locks (rather than acquire them exclusively) at one
394 level you can modify self.share_locks, setting a true value (usually 1) for
395 that level. By default locks are not shared.
397 This function can also define a list of tasklets, which then will be
398 executed in order instead of the usual LU-level CheckPrereq and Exec
399 functions, if those are not defined by the LU.
403 # Acquire all nodes and one instance
404 self.needed_locks = {
405 locking.LEVEL_NODE: locking.ALL_SET,
406 locking.LEVEL_INSTANCE: ['instance1.example.com'],
408 # Acquire just two nodes
409 self.needed_locks = {
410 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
413 self.needed_locks = {} # No, you can't leave it to the default value None
416 # The implementation of this method is mandatory only if the new LU is
417 # concurrent, so that old LUs don't need to be changed all at the same
420 self.needed_locks = {} # Exclusive LUs don't need locks.
422 raise NotImplementedError
424 def DeclareLocks(self, level):
425 """Declare LU locking needs for a level
427 While most LUs can just declare their locking needs at ExpandNames time,
428 sometimes there's the need to calculate some locks after having acquired
429 the ones before. This function is called just before acquiring locks at a
430 particular level, but after acquiring the ones at lower levels, and permits
431 such calculations. It can be used to modify self.needed_locks, and by
432 default it does nothing.
434 This function is only called if you have something already set in
435 self.needed_locks for the level.
437 @param level: Locking level which is going to be locked
438 @type level: member of ganeti.locking.LEVELS
442 def CheckPrereq(self):
443 """Check prerequisites for this LU.
445 This method should check that the prerequisites for the execution
446 of this LU are fulfilled. It can do internode communication, but
447 it should be idempotent - no cluster or system changes are
450 The method should raise errors.OpPrereqError in case something is
451 not fulfilled. Its return value is ignored.
453 This method should also update all the parameters of the opcode to
454 their canonical form if it hasn't been done by ExpandNames before.
457 if self.tasklets is not None:
458 for (idx, tl) in enumerate(self.tasklets):
459 logging.debug("Checking prerequisites for tasklet %s/%s",
460 idx + 1, len(self.tasklets))
465 def Exec(self, feedback_fn):
468 This method should implement the actual work. It should raise
469 errors.OpExecError for failures that are somewhat dealt with in
473 if self.tasklets is not None:
474 for (idx, tl) in enumerate(self.tasklets):
475 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
478 raise NotImplementedError
480 def BuildHooksEnv(self):
481 """Build hooks environment for this LU.
483 This method should return a three-node tuple consisting of: a dict
484 containing the environment that will be used for running the
485 specific hook for this LU, a list of node names on which the hook
486 should run before the execution, and a list of node names on which
487 the hook should run after the execution.
489 The keys of the dict must not have 'GANETI_' prefixed as this will
490 be handled in the hooks runner. Also note additional keys will be
491 added by the hooks runner. If the LU doesn't define any
492 environment, an empty dict (and not None) should be returned.
494 No nodes should be returned as an empty list (and not None).
496 Note that if the HPATH for a LU class is None, this function will
500 raise NotImplementedError
502 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
503 """Notify the LU about the results of its hooks.
505 This method is called every time a hooks phase is executed, and notifies
506 the Logical Unit about the hooks' result. The LU can then use it to alter
507 its result based on the hooks. By default the method does nothing and the
508 previous result is passed back unchanged but any LU can define it if it
509 wants to use the local cluster hook-scripts somehow.
511 @param phase: one of L{constants.HOOKS_PHASE_POST} or
512 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
513 @param hook_results: the results of the multi-node hooks rpc call
514 @param feedback_fn: function used send feedback back to the caller
515 @param lu_result: the previous Exec result this LU had, or None
517 @return: the new Exec result, based on the previous result
521 # API must be kept, thus we ignore the unused argument and could
522 # be a function warnings
523 # pylint: disable-msg=W0613,R0201
526 def _ExpandAndLockInstance(self):
527 """Helper function to expand and lock an instance.
529 Many LUs that work on an instance take its name in self.op.instance_name
530 and need to expand it and then declare the expanded name for locking. This
531 function does it, and then updates self.op.instance_name to the expanded
532 name. It also initializes needed_locks as a dict, if this hasn't been done
536 if self.needed_locks is None:
537 self.needed_locks = {}
539 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
540 "_ExpandAndLockInstance called with instance-level locks set"
541 self.op.instance_name = _ExpandInstanceName(self.cfg,
542 self.op.instance_name)
543 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
545 def _LockInstancesNodes(self, primary_only=False):
546 """Helper function to declare instances' nodes for locking.
548 This function should be called after locking one or more instances to lock
549 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
550 with all primary or secondary nodes for instances already locked and
551 present in self.needed_locks[locking.LEVEL_INSTANCE].
553 It should be called from DeclareLocks, and for safety only works if
554 self.recalculate_locks[locking.LEVEL_NODE] is set.
556 In the future it may grow parameters to just lock some instance's nodes, or
557 to just lock primaries or secondary nodes, if needed.
559 If should be called in DeclareLocks in a way similar to::
561 if level == locking.LEVEL_NODE:
562 self._LockInstancesNodes()
564 @type primary_only: boolean
565 @param primary_only: only lock primary nodes of locked instances
568 assert locking.LEVEL_NODE in self.recalculate_locks, \
569 "_LockInstancesNodes helper function called with no nodes to recalculate"
571 # TODO: check if we're really been called with the instance locks held
573 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
574 # future we might want to have different behaviors depending on the value
575 # of self.recalculate_locks[locking.LEVEL_NODE]
577 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
578 instance = self.context.cfg.GetInstanceInfo(instance_name)
579 wanted_nodes.append(instance.primary_node)
581 wanted_nodes.extend(instance.secondary_nodes)
583 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
584 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
585 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
586 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
588 del self.recalculate_locks[locking.LEVEL_NODE]
591 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
592 """Simple LU which runs no hooks.
594 This LU is intended as a parent for other LogicalUnits which will
595 run no hooks, in order to reduce duplicate code.
601 def BuildHooksEnv(self):
602 """Empty BuildHooksEnv for NoHooksLu.
604 This just raises an error.
607 assert False, "BuildHooksEnv called for NoHooksLUs"
611 """Tasklet base class.
613 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
614 they can mix legacy code with tasklets. Locking needs to be done in the LU,
615 tasklets know nothing about locks.
617 Subclasses must follow these rules:
618 - Implement CheckPrereq
622 def __init__(self, lu):
629 def CheckPrereq(self):
630 """Check prerequisites for this tasklets.
632 This method should check whether the prerequisites for the execution of
633 this tasklet are fulfilled. It can do internode communication, but it
634 should be idempotent - no cluster or system changes are allowed.
636 The method should raise errors.OpPrereqError in case something is not
637 fulfilled. Its return value is ignored.
639 This method should also update all parameters to their canonical form if it
640 hasn't been done before.
645 def Exec(self, feedback_fn):
646 """Execute the tasklet.
648 This method should implement the actual work. It should raise
649 errors.OpExecError for failures that are somewhat dealt with in code, or
653 raise NotImplementedError
656 def _GetWantedNodes(lu, nodes):
657 """Returns list of checked and expanded node names.
659 @type lu: L{LogicalUnit}
660 @param lu: the logical unit on whose behalf we execute
662 @param nodes: list of node names or None for all nodes
664 @return: the list of nodes, sorted
665 @raise errors.ProgrammerError: if the nodes parameter is wrong type
669 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
670 " non-empty list of nodes whose name is to be expanded.")
672 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
673 return utils.NiceSort(wanted)
676 def _GetWantedInstances(lu, instances):
677 """Returns list of checked and expanded instance names.
679 @type lu: L{LogicalUnit}
680 @param lu: the logical unit on whose behalf we execute
681 @type instances: list
682 @param instances: list of instance names or None for all instances
684 @return: the list of instances, sorted
685 @raise errors.OpPrereqError: if the instances parameter is wrong type
686 @raise errors.OpPrereqError: if any of the passed instances is not found
690 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
692 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
696 def _GetUpdatedParams(old_params, update_dict,
697 use_default=True, use_none=False):
698 """Return the new version of a parameter dictionary.
700 @type old_params: dict
701 @param old_params: old parameters
702 @type update_dict: dict
703 @param update_dict: dict containing new parameter values, or
704 constants.VALUE_DEFAULT to reset the parameter to its default
706 @param use_default: boolean
707 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
708 values as 'to be deleted' values
709 @param use_none: boolean
710 @type use_none: whether to recognise C{None} values as 'to be
713 @return: the new parameter dictionary
716 params_copy = copy.deepcopy(old_params)
717 for key, val in update_dict.iteritems():
718 if ((use_default and val == constants.VALUE_DEFAULT) or
719 (use_none and val is None)):
725 params_copy[key] = val
729 def _CheckOutputFields(static, dynamic, selected):
730 """Checks whether all selected fields are valid.
732 @type static: L{utils.FieldSet}
733 @param static: static fields set
734 @type dynamic: L{utils.FieldSet}
735 @param dynamic: dynamic fields set
742 delta = f.NonMatching(selected)
744 raise errors.OpPrereqError("Unknown output fields selected: %s"
745 % ",".join(delta), errors.ECODE_INVAL)
748 def _CheckGlobalHvParams(params):
749 """Validates that given hypervisor params are not global ones.
751 This will ensure that instances don't get customised versions of
755 used_globals = constants.HVC_GLOBALS.intersection(params)
757 msg = ("The following hypervisor parameters are global and cannot"
758 " be customized at instance level, please modify them at"
759 " cluster level: %s" % utils.CommaJoin(used_globals))
760 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
763 def _CheckNodeOnline(lu, node):
764 """Ensure that a given node is online.
766 @param lu: the LU on behalf of which we make the check
767 @param node: the node to check
768 @raise errors.OpPrereqError: if the node is offline
771 if lu.cfg.GetNodeInfo(node).offline:
772 raise errors.OpPrereqError("Can't use offline node %s" % node,
776 def _CheckNodeNotDrained(lu, node):
777 """Ensure that a given node is not drained.
779 @param lu: the LU on behalf of which we make the check
780 @param node: the node to check
781 @raise errors.OpPrereqError: if the node is drained
784 if lu.cfg.GetNodeInfo(node).drained:
785 raise errors.OpPrereqError("Can't use drained node %s" % node,
789 def _CheckNodeHasOS(lu, node, os_name, force_variant):
790 """Ensure that a node supports a given OS.
792 @param lu: the LU on behalf of which we make the check
793 @param node: the node to check
794 @param os_name: the OS to query about
795 @param force_variant: whether to ignore variant errors
796 @raise errors.OpPrereqError: if the node is not supporting the OS
799 result = lu.rpc.call_os_get(node, os_name)
800 result.Raise("OS '%s' not in supported OS list for node %s" %
802 prereq=True, ecode=errors.ECODE_INVAL)
803 if not force_variant:
804 _CheckOSVariant(result.payload, os_name)
807 def _RequireFileStorage():
808 """Checks that file storage is enabled.
810 @raise errors.OpPrereqError: when file storage is disabled
813 if not constants.ENABLE_FILE_STORAGE:
814 raise errors.OpPrereqError("File storage disabled at configure time",
818 def _CheckDiskTemplate(template):
819 """Ensure a given disk template is valid.
822 if template not in constants.DISK_TEMPLATES:
823 msg = ("Invalid disk template name '%s', valid templates are: %s" %
824 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
825 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
826 if template == constants.DT_FILE:
827 _RequireFileStorage()
831 def _CheckStorageType(storage_type):
832 """Ensure a given storage type is valid.
835 if storage_type not in constants.VALID_STORAGE_TYPES:
836 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
838 if storage_type == constants.ST_FILE:
839 _RequireFileStorage()
843 def _GetClusterDomainSecret():
844 """Reads the cluster domain secret.
847 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
851 def _CheckInstanceDown(lu, instance, reason):
852 """Ensure that an instance is not running."""
853 if instance.admin_up:
854 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
855 (instance.name, reason), errors.ECODE_STATE)
857 pnode = instance.primary_node
858 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
859 ins_l.Raise("Can't contact node %s for instance information" % pnode,
860 prereq=True, ecode=errors.ECODE_ENVIRON)
862 if instance.name in ins_l.payload:
863 raise errors.OpPrereqError("Instance %s is running, %s" %
864 (instance.name, reason), errors.ECODE_STATE)
867 def _ExpandItemName(fn, name, kind):
868 """Expand an item name.
870 @param fn: the function to use for expansion
871 @param name: requested item name
872 @param kind: text description ('Node' or 'Instance')
873 @return: the resolved (full) name
874 @raise errors.OpPrereqError: if the item is not found
878 if full_name is None:
879 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
884 def _ExpandNodeName(cfg, name):
885 """Wrapper over L{_ExpandItemName} for nodes."""
886 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
889 def _ExpandInstanceName(cfg, name):
890 """Wrapper over L{_ExpandItemName} for instance."""
891 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
894 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
895 memory, vcpus, nics, disk_template, disks,
896 bep, hvp, hypervisor_name):
897 """Builds instance related env variables for hooks
899 This builds the hook environment from individual variables.
902 @param name: the name of the instance
903 @type primary_node: string
904 @param primary_node: the name of the instance's primary node
905 @type secondary_nodes: list
906 @param secondary_nodes: list of secondary nodes as strings
907 @type os_type: string
908 @param os_type: the name of the instance's OS
909 @type status: boolean
910 @param status: the should_run status of the instance
912 @param memory: the memory size of the instance
914 @param vcpus: the count of VCPUs the instance has
916 @param nics: list of tuples (ip, mac, mode, link) representing
917 the NICs the instance has
918 @type disk_template: string
919 @param disk_template: the disk template of the instance
921 @param disks: the list of (size, mode) pairs
923 @param bep: the backend parameters for the instance
925 @param hvp: the hypervisor parameters for the instance
926 @type hypervisor_name: string
927 @param hypervisor_name: the hypervisor for the instance
929 @return: the hook environment for this instance
938 "INSTANCE_NAME": name,
939 "INSTANCE_PRIMARY": primary_node,
940 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
941 "INSTANCE_OS_TYPE": os_type,
942 "INSTANCE_STATUS": str_status,
943 "INSTANCE_MEMORY": memory,
944 "INSTANCE_VCPUS": vcpus,
945 "INSTANCE_DISK_TEMPLATE": disk_template,
946 "INSTANCE_HYPERVISOR": hypervisor_name,
950 nic_count = len(nics)
951 for idx, (ip, mac, mode, link) in enumerate(nics):
954 env["INSTANCE_NIC%d_IP" % idx] = ip
955 env["INSTANCE_NIC%d_MAC" % idx] = mac
956 env["INSTANCE_NIC%d_MODE" % idx] = mode
957 env["INSTANCE_NIC%d_LINK" % idx] = link
958 if mode == constants.NIC_MODE_BRIDGED:
959 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
963 env["INSTANCE_NIC_COUNT"] = nic_count
966 disk_count = len(disks)
967 for idx, (size, mode) in enumerate(disks):
968 env["INSTANCE_DISK%d_SIZE" % idx] = size
969 env["INSTANCE_DISK%d_MODE" % idx] = mode
973 env["INSTANCE_DISK_COUNT"] = disk_count
975 for source, kind in [(bep, "BE"), (hvp, "HV")]:
976 for key, value in source.items():
977 env["INSTANCE_%s_%s" % (kind, key)] = value
982 def _NICListToTuple(lu, nics):
983 """Build a list of nic information tuples.
985 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
986 value in LUQueryInstanceData.
988 @type lu: L{LogicalUnit}
989 @param lu: the logical unit on whose behalf we execute
990 @type nics: list of L{objects.NIC}
991 @param nics: list of nics to convert to hooks tuples
995 cluster = lu.cfg.GetClusterInfo()
999 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1000 mode = filled_params[constants.NIC_MODE]
1001 link = filled_params[constants.NIC_LINK]
1002 hooks_nics.append((ip, mac, mode, link))
1006 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1007 """Builds instance related env variables for hooks from an object.
1009 @type lu: L{LogicalUnit}
1010 @param lu: the logical unit on whose behalf we execute
1011 @type instance: L{objects.Instance}
1012 @param instance: the instance for which we should build the
1014 @type override: dict
1015 @param override: dictionary with key/values that will override
1018 @return: the hook environment dictionary
1021 cluster = lu.cfg.GetClusterInfo()
1022 bep = cluster.FillBE(instance)
1023 hvp = cluster.FillHV(instance)
1025 'name': instance.name,
1026 'primary_node': instance.primary_node,
1027 'secondary_nodes': instance.secondary_nodes,
1028 'os_type': instance.os,
1029 'status': instance.admin_up,
1030 'memory': bep[constants.BE_MEMORY],
1031 'vcpus': bep[constants.BE_VCPUS],
1032 'nics': _NICListToTuple(lu, instance.nics),
1033 'disk_template': instance.disk_template,
1034 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1037 'hypervisor_name': instance.hypervisor,
1040 args.update(override)
1041 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1044 def _AdjustCandidatePool(lu, exceptions):
1045 """Adjust the candidate pool after node operations.
1048 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1050 lu.LogInfo("Promoted nodes to master candidate role: %s",
1051 utils.CommaJoin(node.name for node in mod_list))
1052 for name in mod_list:
1053 lu.context.ReaddNode(name)
1054 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1056 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1060 def _DecideSelfPromotion(lu, exceptions=None):
1061 """Decide whether I should promote myself as a master candidate.
1064 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1065 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1066 # the new node will increase mc_max with one, so:
1067 mc_should = min(mc_should + 1, cp_size)
1068 return mc_now < mc_should
1071 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1072 """Check that the brigdes needed by a list of nics exist.
1075 cluster = lu.cfg.GetClusterInfo()
1076 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1077 brlist = [params[constants.NIC_LINK] for params in paramslist
1078 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1080 result = lu.rpc.call_bridges_exist(target_node, brlist)
1081 result.Raise("Error checking bridges on destination node '%s'" %
1082 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1085 def _CheckInstanceBridgesExist(lu, instance, node=None):
1086 """Check that the brigdes needed by an instance exist.
1090 node = instance.primary_node
1091 _CheckNicsBridgesExist(lu, instance.nics, node)
1094 def _CheckOSVariant(os_obj, name):
1095 """Check whether an OS name conforms to the os variants specification.
1097 @type os_obj: L{objects.OS}
1098 @param os_obj: OS object to check
1100 @param name: OS name passed by the user, to check for validity
1103 if not os_obj.supported_variants:
1105 variant = objects.OS.GetVariant(name)
1107 raise errors.OpPrereqError("OS name must include a variant",
1110 if variant not in os_obj.supported_variants:
1111 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1114 def _GetNodeInstancesInner(cfg, fn):
1115 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1118 def _GetNodeInstances(cfg, node_name):
1119 """Returns a list of all primary and secondary instances on a node.
1123 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1126 def _GetNodePrimaryInstances(cfg, node_name):
1127 """Returns primary instances on a node.
1130 return _GetNodeInstancesInner(cfg,
1131 lambda inst: node_name == inst.primary_node)
1134 def _GetNodeSecondaryInstances(cfg, node_name):
1135 """Returns secondary instances on a node.
1138 return _GetNodeInstancesInner(cfg,
1139 lambda inst: node_name in inst.secondary_nodes)
1142 def _GetStorageTypeArgs(cfg, storage_type):
1143 """Returns the arguments for a storage type.
1146 # Special case for file storage
1147 if storage_type == constants.ST_FILE:
1148 # storage.FileStorage wants a list of storage directories
1149 return [[cfg.GetFileStorageDir()]]
1154 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1157 for dev in instance.disks:
1158 cfg.SetDiskID(dev, node_name)
1160 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1161 result.Raise("Failed to get disk status from node %s" % node_name,
1162 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1164 for idx, bdev_status in enumerate(result.payload):
1165 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1171 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1172 """Check the sanity of iallocator and node arguments and use the
1173 cluster-wide iallocator if appropriate.
1175 Check that at most one of (iallocator, node) is specified. If none is
1176 specified, then the LU's opcode's iallocator slot is filled with the
1177 cluster-wide default iallocator.
1179 @type iallocator_slot: string
1180 @param iallocator_slot: the name of the opcode iallocator slot
1181 @type node_slot: string
1182 @param node_slot: the name of the opcode target node slot
1185 node = getattr(lu.op, node_slot, None)
1186 iallocator = getattr(lu.op, iallocator_slot, None)
1188 if node is not None and iallocator is not None:
1189 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1191 elif node is None and iallocator is None:
1192 default_iallocator = lu.cfg.GetDefaultIAllocator()
1193 if default_iallocator:
1194 setattr(lu.op, iallocator_slot, default_iallocator)
1196 raise errors.OpPrereqError("No iallocator or node given and no"
1197 " cluster-wide default iallocator found."
1198 " Please specify either an iallocator or a"
1199 " node, or set a cluster-wide default"
1203 class LUPostInitCluster(LogicalUnit):
1204 """Logical unit for running hooks after cluster initialization.
1207 HPATH = "cluster-init"
1208 HTYPE = constants.HTYPE_CLUSTER
1210 def BuildHooksEnv(self):
1214 env = {"OP_TARGET": self.cfg.GetClusterName()}
1215 mn = self.cfg.GetMasterNode()
1216 return env, [], [mn]
1218 def Exec(self, feedback_fn):
1225 class LUDestroyCluster(LogicalUnit):
1226 """Logical unit for destroying the cluster.
1229 HPATH = "cluster-destroy"
1230 HTYPE = constants.HTYPE_CLUSTER
1232 def BuildHooksEnv(self):
1236 env = {"OP_TARGET": self.cfg.GetClusterName()}
1239 def CheckPrereq(self):
1240 """Check prerequisites.
1242 This checks whether the cluster is empty.
1244 Any errors are signaled by raising errors.OpPrereqError.
1247 master = self.cfg.GetMasterNode()
1249 nodelist = self.cfg.GetNodeList()
1250 if len(nodelist) != 1 or nodelist[0] != master:
1251 raise errors.OpPrereqError("There are still %d node(s) in"
1252 " this cluster." % (len(nodelist) - 1),
1254 instancelist = self.cfg.GetInstanceList()
1256 raise errors.OpPrereqError("There are still %d instance(s) in"
1257 " this cluster." % len(instancelist),
1260 def Exec(self, feedback_fn):
1261 """Destroys the cluster.
1264 master = self.cfg.GetMasterNode()
1265 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1267 # Run post hooks on master node before it's removed
1268 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1270 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1272 # pylint: disable-msg=W0702
1273 self.LogWarning("Errors occurred running hooks on %s" % master)
1275 result = self.rpc.call_node_stop_master(master, False)
1276 result.Raise("Could not disable the master role")
1278 if modify_ssh_setup:
1279 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1280 utils.CreateBackup(priv_key)
1281 utils.CreateBackup(pub_key)
1286 def _VerifyCertificate(filename):
1287 """Verifies a certificate for LUVerifyCluster.
1289 @type filename: string
1290 @param filename: Path to PEM file
1294 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1295 utils.ReadFile(filename))
1296 except Exception, err: # pylint: disable-msg=W0703
1297 return (LUVerifyCluster.ETYPE_ERROR,
1298 "Failed to load X509 certificate %s: %s" % (filename, err))
1301 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1302 constants.SSL_CERT_EXPIRATION_ERROR)
1305 fnamemsg = "While verifying %s: %s" % (filename, msg)
1310 return (None, fnamemsg)
1311 elif errcode == utils.CERT_WARNING:
1312 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1313 elif errcode == utils.CERT_ERROR:
1314 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1316 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1319 class LUVerifyCluster(LogicalUnit):
1320 """Verifies the cluster status.
1323 HPATH = "cluster-verify"
1324 HTYPE = constants.HTYPE_CLUSTER
1326 ("skip_checks", _EmptyList,
1327 _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1328 ("verbose", False, _TBool),
1329 ("error_codes", False, _TBool),
1330 ("debug_simulate_errors", False, _TBool),
1334 TCLUSTER = "cluster"
1336 TINSTANCE = "instance"
1338 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1339 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1340 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1341 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1342 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1343 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1344 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1345 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1346 ENODEDRBD = (TNODE, "ENODEDRBD")
1347 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1348 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1349 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1350 ENODEHV = (TNODE, "ENODEHV")
1351 ENODELVM = (TNODE, "ENODELVM")
1352 ENODEN1 = (TNODE, "ENODEN1")
1353 ENODENET = (TNODE, "ENODENET")
1354 ENODEOS = (TNODE, "ENODEOS")
1355 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1356 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1357 ENODERPC = (TNODE, "ENODERPC")
1358 ENODESSH = (TNODE, "ENODESSH")
1359 ENODEVERSION = (TNODE, "ENODEVERSION")
1360 ENODESETUP = (TNODE, "ENODESETUP")
1361 ENODETIME = (TNODE, "ENODETIME")
1363 ETYPE_FIELD = "code"
1364 ETYPE_ERROR = "ERROR"
1365 ETYPE_WARNING = "WARNING"
1367 class NodeImage(object):
1368 """A class representing the logical and physical status of a node.
1371 @ivar name: the node name to which this object refers
1372 @ivar volumes: a structure as returned from
1373 L{ganeti.backend.GetVolumeList} (runtime)
1374 @ivar instances: a list of running instances (runtime)
1375 @ivar pinst: list of configured primary instances (config)
1376 @ivar sinst: list of configured secondary instances (config)
1377 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1378 of this node (config)
1379 @ivar mfree: free memory, as reported by hypervisor (runtime)
1380 @ivar dfree: free disk, as reported by the node (runtime)
1381 @ivar offline: the offline status (config)
1382 @type rpc_fail: boolean
1383 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1384 not whether the individual keys were correct) (runtime)
1385 @type lvm_fail: boolean
1386 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1387 @type hyp_fail: boolean
1388 @ivar hyp_fail: whether the RPC call didn't return the instance list
1389 @type ghost: boolean
1390 @ivar ghost: whether this is a known node or not (config)
1391 @type os_fail: boolean
1392 @ivar os_fail: whether the RPC call didn't return valid OS data
1394 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1397 def __init__(self, offline=False, name=None):
1406 self.offline = offline
1407 self.rpc_fail = False
1408 self.lvm_fail = False
1409 self.hyp_fail = False
1411 self.os_fail = False
1414 def ExpandNames(self):
1415 self.needed_locks = {
1416 locking.LEVEL_NODE: locking.ALL_SET,
1417 locking.LEVEL_INSTANCE: locking.ALL_SET,
1419 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1421 def _Error(self, ecode, item, msg, *args, **kwargs):
1422 """Format an error message.
1424 Based on the opcode's error_codes parameter, either format a
1425 parseable error code, or a simpler error string.
1427 This must be called only from Exec and functions called from Exec.
1430 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1432 # first complete the msg
1435 # then format the whole message
1436 if self.op.error_codes:
1437 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1443 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1444 # and finally report it via the feedback_fn
1445 self._feedback_fn(" - %s" % msg)
1447 def _ErrorIf(self, cond, *args, **kwargs):
1448 """Log an error message if the passed condition is True.
1451 cond = bool(cond) or self.op.debug_simulate_errors
1453 self._Error(*args, **kwargs)
1454 # do not mark the operation as failed for WARN cases only
1455 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1456 self.bad = self.bad or cond
1458 def _VerifyNode(self, ninfo, nresult):
1459 """Perform some basic validation on data returned from a node.
1461 - check the result data structure is well formed and has all the
1463 - check ganeti version
1465 @type ninfo: L{objects.Node}
1466 @param ninfo: the node to check
1467 @param nresult: the results from the node
1469 @return: whether overall this call was successful (and we can expect
1470 reasonable values in the respose)
1474 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1476 # main result, nresult should be a non-empty dict
1477 test = not nresult or not isinstance(nresult, dict)
1478 _ErrorIf(test, self.ENODERPC, node,
1479 "unable to verify node: no data returned")
1483 # compares ganeti version
1484 local_version = constants.PROTOCOL_VERSION
1485 remote_version = nresult.get("version", None)
1486 test = not (remote_version and
1487 isinstance(remote_version, (list, tuple)) and
1488 len(remote_version) == 2)
1489 _ErrorIf(test, self.ENODERPC, node,
1490 "connection to node returned invalid data")
1494 test = local_version != remote_version[0]
1495 _ErrorIf(test, self.ENODEVERSION, node,
1496 "incompatible protocol versions: master %s,"
1497 " node %s", local_version, remote_version[0])
1501 # node seems compatible, we can actually try to look into its results
1503 # full package version
1504 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1505 self.ENODEVERSION, node,
1506 "software version mismatch: master %s, node %s",
1507 constants.RELEASE_VERSION, remote_version[1],
1508 code=self.ETYPE_WARNING)
1510 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1511 if isinstance(hyp_result, dict):
1512 for hv_name, hv_result in hyp_result.iteritems():
1513 test = hv_result is not None
1514 _ErrorIf(test, self.ENODEHV, node,
1515 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1518 test = nresult.get(constants.NV_NODESETUP,
1519 ["Missing NODESETUP results"])
1520 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1525 def _VerifyNodeTime(self, ninfo, nresult,
1526 nvinfo_starttime, nvinfo_endtime):
1527 """Check the node time.
1529 @type ninfo: L{objects.Node}
1530 @param ninfo: the node to check
1531 @param nresult: the remote results for the node
1532 @param nvinfo_starttime: the start time of the RPC call
1533 @param nvinfo_endtime: the end time of the RPC call
1537 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1539 ntime = nresult.get(constants.NV_TIME, None)
1541 ntime_merged = utils.MergeTime(ntime)
1542 except (ValueError, TypeError):
1543 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1546 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1547 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1548 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1549 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1553 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1554 "Node time diverges by at least %s from master node time",
1557 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1558 """Check the node time.
1560 @type ninfo: L{objects.Node}
1561 @param ninfo: the node to check
1562 @param nresult: the remote results for the node
1563 @param vg_name: the configured VG name
1570 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1572 # checks vg existence and size > 20G
1573 vglist = nresult.get(constants.NV_VGLIST, None)
1575 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1577 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1578 constants.MIN_VG_SIZE)
1579 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1582 pvlist = nresult.get(constants.NV_PVLIST, None)
1583 test = pvlist is None
1584 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1586 # check that ':' is not present in PV names, since it's a
1587 # special character for lvcreate (denotes the range of PEs to
1589 for _, pvname, owner_vg in pvlist:
1590 test = ":" in pvname
1591 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1592 " '%s' of VG '%s'", pvname, owner_vg)
1594 def _VerifyNodeNetwork(self, ninfo, nresult):
1595 """Check the node time.
1597 @type ninfo: L{objects.Node}
1598 @param ninfo: the node to check
1599 @param nresult: the remote results for the node
1603 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1605 test = constants.NV_NODELIST not in nresult
1606 _ErrorIf(test, self.ENODESSH, node,
1607 "node hasn't returned node ssh connectivity data")
1609 if nresult[constants.NV_NODELIST]:
1610 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1611 _ErrorIf(True, self.ENODESSH, node,
1612 "ssh communication with node '%s': %s", a_node, a_msg)
1614 test = constants.NV_NODENETTEST not in nresult
1615 _ErrorIf(test, self.ENODENET, node,
1616 "node hasn't returned node tcp connectivity data")
1618 if nresult[constants.NV_NODENETTEST]:
1619 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1621 _ErrorIf(True, self.ENODENET, node,
1622 "tcp communication with node '%s': %s",
1623 anode, nresult[constants.NV_NODENETTEST][anode])
1625 test = constants.NV_MASTERIP not in nresult
1626 _ErrorIf(test, self.ENODENET, node,
1627 "node hasn't returned node master IP reachability data")
1629 if not nresult[constants.NV_MASTERIP]:
1630 if node == self.master_node:
1631 msg = "the master node cannot reach the master IP (not configured?)"
1633 msg = "cannot reach the master IP"
1634 _ErrorIf(True, self.ENODENET, node, msg)
1637 def _VerifyInstance(self, instance, instanceconfig, node_image):
1638 """Verify an instance.
1640 This function checks to see if the required block devices are
1641 available on the instance's node.
1644 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1645 node_current = instanceconfig.primary_node
1647 node_vol_should = {}
1648 instanceconfig.MapLVsByNode(node_vol_should)
1650 for node in node_vol_should:
1651 n_img = node_image[node]
1652 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1653 # ignore missing volumes on offline or broken nodes
1655 for volume in node_vol_should[node]:
1656 test = volume not in n_img.volumes
1657 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1658 "volume %s missing on node %s", volume, node)
1660 if instanceconfig.admin_up:
1661 pri_img = node_image[node_current]
1662 test = instance not in pri_img.instances and not pri_img.offline
1663 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1664 "instance not running on its primary node %s",
1667 for node, n_img in node_image.items():
1668 if (not node == node_current):
1669 test = instance in n_img.instances
1670 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1671 "instance should not run on node %s", node)
1673 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1674 """Verify if there are any unknown volumes in the cluster.
1676 The .os, .swap and backup volumes are ignored. All other volumes are
1677 reported as unknown.
1679 @type reserved: L{ganeti.utils.FieldSet}
1680 @param reserved: a FieldSet of reserved volume names
1683 for node, n_img in node_image.items():
1684 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1685 # skip non-healthy nodes
1687 for volume in n_img.volumes:
1688 test = ((node not in node_vol_should or
1689 volume not in node_vol_should[node]) and
1690 not reserved.Matches(volume))
1691 self._ErrorIf(test, self.ENODEORPHANLV, node,
1692 "volume %s is unknown", volume)
1694 def _VerifyOrphanInstances(self, instancelist, node_image):
1695 """Verify the list of running instances.
1697 This checks what instances are running but unknown to the cluster.
1700 for node, n_img in node_image.items():
1701 for o_inst in n_img.instances:
1702 test = o_inst not in instancelist
1703 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1704 "instance %s on node %s should not exist", o_inst, node)
1706 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1707 """Verify N+1 Memory Resilience.
1709 Check that if one single node dies we can still start all the
1710 instances it was primary for.
1713 for node, n_img in node_image.items():
1714 # This code checks that every node which is now listed as
1715 # secondary has enough memory to host all instances it is
1716 # supposed to should a single other node in the cluster fail.
1717 # FIXME: not ready for failover to an arbitrary node
1718 # FIXME: does not support file-backed instances
1719 # WARNING: we currently take into account down instances as well
1720 # as up ones, considering that even if they're down someone
1721 # might want to start them even in the event of a node failure.
1722 for prinode, instances in n_img.sbp.items():
1724 for instance in instances:
1725 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1726 if bep[constants.BE_AUTO_BALANCE]:
1727 needed_mem += bep[constants.BE_MEMORY]
1728 test = n_img.mfree < needed_mem
1729 self._ErrorIf(test, self.ENODEN1, node,
1730 "not enough memory on to accommodate"
1731 " failovers should peer node %s fail", prinode)
1733 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1735 """Verifies and computes the node required file checksums.
1737 @type ninfo: L{objects.Node}
1738 @param ninfo: the node to check
1739 @param nresult: the remote results for the node
1740 @param file_list: required list of files
1741 @param local_cksum: dictionary of local files and their checksums
1742 @param master_files: list of files that only masters should have
1746 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1748 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1749 test = not isinstance(remote_cksum, dict)
1750 _ErrorIf(test, self.ENODEFILECHECK, node,
1751 "node hasn't returned file checksum data")
1755 for file_name in file_list:
1756 node_is_mc = ninfo.master_candidate
1757 must_have = (file_name not in master_files) or node_is_mc
1759 test1 = file_name not in remote_cksum
1761 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1763 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1764 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1765 "file '%s' missing", file_name)
1766 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1767 "file '%s' has wrong checksum", file_name)
1768 # not candidate and this is not a must-have file
1769 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1770 "file '%s' should not exist on non master"
1771 " candidates (and the file is outdated)", file_name)
1772 # all good, except non-master/non-must have combination
1773 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1774 "file '%s' should not exist"
1775 " on non master candidates", file_name)
1777 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1779 """Verifies and the node DRBD status.
1781 @type ninfo: L{objects.Node}
1782 @param ninfo: the node to check
1783 @param nresult: the remote results for the node
1784 @param instanceinfo: the dict of instances
1785 @param drbd_helper: the configured DRBD usermode helper
1786 @param drbd_map: the DRBD map as returned by
1787 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1791 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1794 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1795 test = (helper_result == None)
1796 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1797 "no drbd usermode helper returned")
1799 status, payload = helper_result
1801 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1802 "drbd usermode helper check unsuccessful: %s", payload)
1803 test = status and (payload != drbd_helper)
1804 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1805 "wrong drbd usermode helper: %s", payload)
1807 # compute the DRBD minors
1809 for minor, instance in drbd_map[node].items():
1810 test = instance not in instanceinfo
1811 _ErrorIf(test, self.ECLUSTERCFG, None,
1812 "ghost instance '%s' in temporary DRBD map", instance)
1813 # ghost instance should not be running, but otherwise we
1814 # don't give double warnings (both ghost instance and
1815 # unallocated minor in use)
1817 node_drbd[minor] = (instance, False)
1819 instance = instanceinfo[instance]
1820 node_drbd[minor] = (instance.name, instance.admin_up)
1822 # and now check them
1823 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1824 test = not isinstance(used_minors, (tuple, list))
1825 _ErrorIf(test, self.ENODEDRBD, node,
1826 "cannot parse drbd status file: %s", str(used_minors))
1828 # we cannot check drbd status
1831 for minor, (iname, must_exist) in node_drbd.items():
1832 test = minor not in used_minors and must_exist
1833 _ErrorIf(test, self.ENODEDRBD, node,
1834 "drbd minor %d of instance %s is not active", minor, iname)
1835 for minor in used_minors:
1836 test = minor not in node_drbd
1837 _ErrorIf(test, self.ENODEDRBD, node,
1838 "unallocated drbd minor %d is in use", minor)
1840 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1841 """Builds the node OS structures.
1843 @type ninfo: L{objects.Node}
1844 @param ninfo: the node to check
1845 @param nresult: the remote results for the node
1846 @param nimg: the node image object
1850 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1852 remote_os = nresult.get(constants.NV_OSLIST, None)
1853 test = (not isinstance(remote_os, list) or
1854 not compat.all(isinstance(v, list) and len(v) == 7
1855 for v in remote_os))
1857 _ErrorIf(test, self.ENODEOS, node,
1858 "node hasn't returned valid OS data")
1867 for (name, os_path, status, diagnose,
1868 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1870 if name not in os_dict:
1873 # parameters is a list of lists instead of list of tuples due to
1874 # JSON lacking a real tuple type, fix it:
1875 parameters = [tuple(v) for v in parameters]
1876 os_dict[name].append((os_path, status, diagnose,
1877 set(variants), set(parameters), set(api_ver)))
1879 nimg.oslist = os_dict
1881 def _VerifyNodeOS(self, ninfo, nimg, base):
1882 """Verifies the node OS list.
1884 @type ninfo: L{objects.Node}
1885 @param ninfo: the node to check
1886 @param nimg: the node image object
1887 @param base: the 'template' node we match against (e.g. from the master)
1891 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1893 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1895 for os_name, os_data in nimg.oslist.items():
1896 assert os_data, "Empty OS status for OS %s?!" % os_name
1897 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1898 _ErrorIf(not f_status, self.ENODEOS, node,
1899 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1900 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1901 "OS '%s' has multiple entries (first one shadows the rest): %s",
1902 os_name, utils.CommaJoin([v[0] for v in os_data]))
1903 # this will catched in backend too
1904 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1905 and not f_var, self.ENODEOS, node,
1906 "OS %s with API at least %d does not declare any variant",
1907 os_name, constants.OS_API_V15)
1908 # comparisons with the 'base' image
1909 test = os_name not in base.oslist
1910 _ErrorIf(test, self.ENODEOS, node,
1911 "Extra OS %s not present on reference node (%s)",
1915 assert base.oslist[os_name], "Base node has empty OS status?"
1916 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1918 # base OS is invalid, skipping
1920 for kind, a, b in [("API version", f_api, b_api),
1921 ("variants list", f_var, b_var),
1922 ("parameters", f_param, b_param)]:
1923 _ErrorIf(a != b, self.ENODEOS, node,
1924 "OS %s %s differs from reference node %s: %s vs. %s",
1925 kind, os_name, base.name,
1926 utils.CommaJoin(a), utils.CommaJoin(b))
1928 # check any missing OSes
1929 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1930 _ErrorIf(missing, self.ENODEOS, node,
1931 "OSes present on reference node %s but missing on this node: %s",
1932 base.name, utils.CommaJoin(missing))
1934 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1935 """Verifies and updates the node volume data.
1937 This function will update a L{NodeImage}'s internal structures
1938 with data from the remote call.
1940 @type ninfo: L{objects.Node}
1941 @param ninfo: the node to check
1942 @param nresult: the remote results for the node
1943 @param nimg: the node image object
1944 @param vg_name: the configured VG name
1948 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1950 nimg.lvm_fail = True
1951 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1954 elif isinstance(lvdata, basestring):
1955 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1956 utils.SafeEncode(lvdata))
1957 elif not isinstance(lvdata, dict):
1958 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1960 nimg.volumes = lvdata
1961 nimg.lvm_fail = False
1963 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1964 """Verifies and updates the node instance list.
1966 If the listing was successful, then updates this node's instance
1967 list. Otherwise, it marks the RPC call as failed for the instance
1970 @type ninfo: L{objects.Node}
1971 @param ninfo: the node to check
1972 @param nresult: the remote results for the node
1973 @param nimg: the node image object
1976 idata = nresult.get(constants.NV_INSTANCELIST, None)
1977 test = not isinstance(idata, list)
1978 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1979 " (instancelist): %s", utils.SafeEncode(str(idata)))
1981 nimg.hyp_fail = True
1983 nimg.instances = idata
1985 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1986 """Verifies and computes a node information map
1988 @type ninfo: L{objects.Node}
1989 @param ninfo: the node to check
1990 @param nresult: the remote results for the node
1991 @param nimg: the node image object
1992 @param vg_name: the configured VG name
1996 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1998 # try to read free memory (from the hypervisor)
1999 hv_info = nresult.get(constants.NV_HVINFO, None)
2000 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2001 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2004 nimg.mfree = int(hv_info["memory_free"])
2005 except (ValueError, TypeError):
2006 _ErrorIf(True, self.ENODERPC, node,
2007 "node returned invalid nodeinfo, check hypervisor")
2009 # FIXME: devise a free space model for file based instances as well
2010 if vg_name is not None:
2011 test = (constants.NV_VGLIST not in nresult or
2012 vg_name not in nresult[constants.NV_VGLIST])
2013 _ErrorIf(test, self.ENODELVM, node,
2014 "node didn't return data for the volume group '%s'"
2015 " - it is either missing or broken", vg_name)
2018 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2019 except (ValueError, TypeError):
2020 _ErrorIf(True, self.ENODERPC, node,
2021 "node returned invalid LVM info, check LVM status")
2023 def BuildHooksEnv(self):
2026 Cluster-Verify hooks just ran in the post phase and their failure makes
2027 the output be logged in the verify output and the verification to fail.
2030 all_nodes = self.cfg.GetNodeList()
2032 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2034 for node in self.cfg.GetAllNodesInfo().values():
2035 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2037 return env, [], all_nodes
2039 def Exec(self, feedback_fn):
2040 """Verify integrity of cluster, performing various test on nodes.
2044 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2045 verbose = self.op.verbose
2046 self._feedback_fn = feedback_fn
2047 feedback_fn("* Verifying global settings")
2048 for msg in self.cfg.VerifyConfig():
2049 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2051 # Check the cluster certificates
2052 for cert_filename in constants.ALL_CERT_FILES:
2053 (errcode, msg) = _VerifyCertificate(cert_filename)
2054 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2056 vg_name = self.cfg.GetVGName()
2057 drbd_helper = self.cfg.GetDRBDHelper()
2058 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2059 cluster = self.cfg.GetClusterInfo()
2060 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2061 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2062 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2063 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2064 for iname in instancelist)
2065 i_non_redundant = [] # Non redundant instances
2066 i_non_a_balanced = [] # Non auto-balanced instances
2067 n_offline = 0 # Count of offline nodes
2068 n_drained = 0 # Count of nodes being drained
2069 node_vol_should = {}
2071 # FIXME: verify OS list
2072 # do local checksums
2073 master_files = [constants.CLUSTER_CONF_FILE]
2074 master_node = self.master_node = self.cfg.GetMasterNode()
2075 master_ip = self.cfg.GetMasterIP()
2077 file_names = ssconf.SimpleStore().GetFileList()
2078 file_names.extend(constants.ALL_CERT_FILES)
2079 file_names.extend(master_files)
2080 if cluster.modify_etc_hosts:
2081 file_names.append(constants.ETC_HOSTS)
2083 local_checksums = utils.FingerprintFiles(file_names)
2085 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2086 node_verify_param = {
2087 constants.NV_FILELIST: file_names,
2088 constants.NV_NODELIST: [node.name for node in nodeinfo
2089 if not node.offline],
2090 constants.NV_HYPERVISOR: hypervisors,
2091 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2092 node.secondary_ip) for node in nodeinfo
2093 if not node.offline],
2094 constants.NV_INSTANCELIST: hypervisors,
2095 constants.NV_VERSION: None,
2096 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2097 constants.NV_NODESETUP: None,
2098 constants.NV_TIME: None,
2099 constants.NV_MASTERIP: (master_node, master_ip),
2100 constants.NV_OSLIST: None,
2103 if vg_name is not None:
2104 node_verify_param[constants.NV_VGLIST] = None
2105 node_verify_param[constants.NV_LVLIST] = vg_name
2106 node_verify_param[constants.NV_PVLIST] = [vg_name]
2107 node_verify_param[constants.NV_DRBDLIST] = None
2110 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2112 # Build our expected cluster state
2113 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2115 for node in nodeinfo)
2117 for instance in instancelist:
2118 inst_config = instanceinfo[instance]
2120 for nname in inst_config.all_nodes:
2121 if nname not in node_image:
2123 gnode = self.NodeImage(name=nname)
2125 node_image[nname] = gnode
2127 inst_config.MapLVsByNode(node_vol_should)
2129 pnode = inst_config.primary_node
2130 node_image[pnode].pinst.append(instance)
2132 for snode in inst_config.secondary_nodes:
2133 nimg = node_image[snode]
2134 nimg.sinst.append(instance)
2135 if pnode not in nimg.sbp:
2136 nimg.sbp[pnode] = []
2137 nimg.sbp[pnode].append(instance)
2139 # At this point, we have the in-memory data structures complete,
2140 # except for the runtime information, which we'll gather next
2142 # Due to the way our RPC system works, exact response times cannot be
2143 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2144 # time before and after executing the request, we can at least have a time
2146 nvinfo_starttime = time.time()
2147 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2148 self.cfg.GetClusterName())
2149 nvinfo_endtime = time.time()
2151 all_drbd_map = self.cfg.ComputeDRBDMap()
2153 feedback_fn("* Verifying node status")
2157 for node_i in nodeinfo:
2159 nimg = node_image[node]
2163 feedback_fn("* Skipping offline node %s" % (node,))
2167 if node == master_node:
2169 elif node_i.master_candidate:
2170 ntype = "master candidate"
2171 elif node_i.drained:
2177 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2179 msg = all_nvinfo[node].fail_msg
2180 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2182 nimg.rpc_fail = True
2185 nresult = all_nvinfo[node].payload
2187 nimg.call_ok = self._VerifyNode(node_i, nresult)
2188 self._VerifyNodeNetwork(node_i, nresult)
2189 self._VerifyNodeLVM(node_i, nresult, vg_name)
2190 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2192 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2194 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2196 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2197 self._UpdateNodeInstances(node_i, nresult, nimg)
2198 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2199 self._UpdateNodeOS(node_i, nresult, nimg)
2200 if not nimg.os_fail:
2201 if refos_img is None:
2203 self._VerifyNodeOS(node_i, nimg, refos_img)
2205 feedback_fn("* Verifying instance status")
2206 for instance in instancelist:
2208 feedback_fn("* Verifying instance %s" % instance)
2209 inst_config = instanceinfo[instance]
2210 self._VerifyInstance(instance, inst_config, node_image)
2211 inst_nodes_offline = []
2213 pnode = inst_config.primary_node
2214 pnode_img = node_image[pnode]
2215 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2216 self.ENODERPC, pnode, "instance %s, connection to"
2217 " primary node failed", instance)
2219 if pnode_img.offline:
2220 inst_nodes_offline.append(pnode)
2222 # If the instance is non-redundant we cannot survive losing its primary
2223 # node, so we are not N+1 compliant. On the other hand we have no disk
2224 # templates with more than one secondary so that situation is not well
2226 # FIXME: does not support file-backed instances
2227 if not inst_config.secondary_nodes:
2228 i_non_redundant.append(instance)
2229 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2230 instance, "instance has multiple secondary nodes: %s",
2231 utils.CommaJoin(inst_config.secondary_nodes),
2232 code=self.ETYPE_WARNING)
2234 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2235 i_non_a_balanced.append(instance)
2237 for snode in inst_config.secondary_nodes:
2238 s_img = node_image[snode]
2239 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2240 "instance %s, connection to secondary node failed", instance)
2243 inst_nodes_offline.append(snode)
2245 # warn that the instance lives on offline nodes
2246 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2247 "instance lives on offline node(s) %s",
2248 utils.CommaJoin(inst_nodes_offline))
2249 # ... or ghost nodes
2250 for node in inst_config.all_nodes:
2251 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2252 "instance lives on ghost node %s", node)
2254 feedback_fn("* Verifying orphan volumes")
2255 reserved = utils.FieldSet(*cluster.reserved_lvs)
2256 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2258 feedback_fn("* Verifying orphan instances")
2259 self._VerifyOrphanInstances(instancelist, node_image)
2261 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2262 feedback_fn("* Verifying N+1 Memory redundancy")
2263 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2265 feedback_fn("* Other Notes")
2267 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2268 % len(i_non_redundant))
2270 if i_non_a_balanced:
2271 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2272 % len(i_non_a_balanced))
2275 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2278 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2282 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2283 """Analyze the post-hooks' result
2285 This method analyses the hook result, handles it, and sends some
2286 nicely-formatted feedback back to the user.
2288 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2289 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2290 @param hooks_results: the results of the multi-node hooks rpc call
2291 @param feedback_fn: function used send feedback back to the caller
2292 @param lu_result: previous Exec result
2293 @return: the new Exec result, based on the previous result
2297 # We only really run POST phase hooks, and are only interested in
2299 if phase == constants.HOOKS_PHASE_POST:
2300 # Used to change hooks' output to proper indentation
2301 indent_re = re.compile('^', re.M)
2302 feedback_fn("* Hooks Results")
2303 assert hooks_results, "invalid result from hooks"
2305 for node_name in hooks_results:
2306 res = hooks_results[node_name]
2308 test = msg and not res.offline
2309 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2310 "Communication failure in hooks execution: %s", msg)
2311 if res.offline or msg:
2312 # No need to investigate payload if node is offline or gave an error.
2313 # override manually lu_result here as _ErrorIf only
2314 # overrides self.bad
2317 for script, hkr, output in res.payload:
2318 test = hkr == constants.HKR_FAIL
2319 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2320 "Script %s failed, output:", script)
2322 output = indent_re.sub(' ', output)
2323 feedback_fn("%s" % output)
2329 class LUVerifyDisks(NoHooksLU):
2330 """Verifies the cluster disks status.
2335 def ExpandNames(self):
2336 self.needed_locks = {
2337 locking.LEVEL_NODE: locking.ALL_SET,
2338 locking.LEVEL_INSTANCE: locking.ALL_SET,
2340 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2342 def Exec(self, feedback_fn):
2343 """Verify integrity of cluster disks.
2345 @rtype: tuple of three items
2346 @return: a tuple of (dict of node-to-node_error, list of instances
2347 which need activate-disks, dict of instance: (node, volume) for
2351 result = res_nodes, res_instances, res_missing = {}, [], {}
2353 vg_name = self.cfg.GetVGName()
2354 nodes = utils.NiceSort(self.cfg.GetNodeList())
2355 instances = [self.cfg.GetInstanceInfo(name)
2356 for name in self.cfg.GetInstanceList()]
2359 for inst in instances:
2361 if (not inst.admin_up or
2362 inst.disk_template not in constants.DTS_NET_MIRROR):
2364 inst.MapLVsByNode(inst_lvs)
2365 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2366 for node, vol_list in inst_lvs.iteritems():
2367 for vol in vol_list:
2368 nv_dict[(node, vol)] = inst
2373 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2377 node_res = node_lvs[node]
2378 if node_res.offline:
2380 msg = node_res.fail_msg
2382 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2383 res_nodes[node] = msg
2386 lvs = node_res.payload
2387 for lv_name, (_, _, lv_online) in lvs.items():
2388 inst = nv_dict.pop((node, lv_name), None)
2389 if (not lv_online and inst is not None
2390 and inst.name not in res_instances):
2391 res_instances.append(inst.name)
2393 # any leftover items in nv_dict are missing LVs, let's arrange the
2395 for key, inst in nv_dict.iteritems():
2396 if inst.name not in res_missing:
2397 res_missing[inst.name] = []
2398 res_missing[inst.name].append(key)
2403 class LURepairDiskSizes(NoHooksLU):
2404 """Verifies the cluster disks sizes.
2407 _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2410 def ExpandNames(self):
2411 if self.op.instances:
2412 self.wanted_names = []
2413 for name in self.op.instances:
2414 full_name = _ExpandInstanceName(self.cfg, name)
2415 self.wanted_names.append(full_name)
2416 self.needed_locks = {
2417 locking.LEVEL_NODE: [],
2418 locking.LEVEL_INSTANCE: self.wanted_names,
2420 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2422 self.wanted_names = None
2423 self.needed_locks = {
2424 locking.LEVEL_NODE: locking.ALL_SET,
2425 locking.LEVEL_INSTANCE: locking.ALL_SET,
2427 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2429 def DeclareLocks(self, level):
2430 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2431 self._LockInstancesNodes(primary_only=True)
2433 def CheckPrereq(self):
2434 """Check prerequisites.
2436 This only checks the optional instance list against the existing names.
2439 if self.wanted_names is None:
2440 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2442 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2443 in self.wanted_names]
2445 def _EnsureChildSizes(self, disk):
2446 """Ensure children of the disk have the needed disk size.
2448 This is valid mainly for DRBD8 and fixes an issue where the
2449 children have smaller disk size.
2451 @param disk: an L{ganeti.objects.Disk} object
2454 if disk.dev_type == constants.LD_DRBD8:
2455 assert disk.children, "Empty children for DRBD8?"
2456 fchild = disk.children[0]
2457 mismatch = fchild.size < disk.size
2459 self.LogInfo("Child disk has size %d, parent %d, fixing",
2460 fchild.size, disk.size)
2461 fchild.size = disk.size
2463 # and we recurse on this child only, not on the metadev
2464 return self._EnsureChildSizes(fchild) or mismatch
2468 def Exec(self, feedback_fn):
2469 """Verify the size of cluster disks.
2472 # TODO: check child disks too
2473 # TODO: check differences in size between primary/secondary nodes
2475 for instance in self.wanted_instances:
2476 pnode = instance.primary_node
2477 if pnode not in per_node_disks:
2478 per_node_disks[pnode] = []
2479 for idx, disk in enumerate(instance.disks):
2480 per_node_disks[pnode].append((instance, idx, disk))
2483 for node, dskl in per_node_disks.items():
2484 newl = [v[2].Copy() for v in dskl]
2486 self.cfg.SetDiskID(dsk, node)
2487 result = self.rpc.call_blockdev_getsize(node, newl)
2489 self.LogWarning("Failure in blockdev_getsize call to node"
2490 " %s, ignoring", node)
2492 if len(result.data) != len(dskl):
2493 self.LogWarning("Invalid result from node %s, ignoring node results",
2496 for ((instance, idx, disk), size) in zip(dskl, result.data):
2498 self.LogWarning("Disk %d of instance %s did not return size"
2499 " information, ignoring", idx, instance.name)
2501 if not isinstance(size, (int, long)):
2502 self.LogWarning("Disk %d of instance %s did not return valid"
2503 " size information, ignoring", idx, instance.name)
2506 if size != disk.size:
2507 self.LogInfo("Disk %d of instance %s has mismatched size,"
2508 " correcting: recorded %d, actual %d", idx,
2509 instance.name, disk.size, size)
2511 self.cfg.Update(instance, feedback_fn)
2512 changed.append((instance.name, idx, size))
2513 if self._EnsureChildSizes(disk):
2514 self.cfg.Update(instance, feedback_fn)
2515 changed.append((instance.name, idx, disk.size))
2519 class LURenameCluster(LogicalUnit):
2520 """Rename the cluster.
2523 HPATH = "cluster-rename"
2524 HTYPE = constants.HTYPE_CLUSTER
2525 _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2527 def BuildHooksEnv(self):
2532 "OP_TARGET": self.cfg.GetClusterName(),
2533 "NEW_NAME": self.op.name,
2535 mn = self.cfg.GetMasterNode()
2536 all_nodes = self.cfg.GetNodeList()
2537 return env, [mn], all_nodes
2539 def CheckPrereq(self):
2540 """Verify that the passed name is a valid one.
2543 hostname = netutils.GetHostInfo(self.op.name)
2545 new_name = hostname.name
2546 self.ip = new_ip = hostname.ip
2547 old_name = self.cfg.GetClusterName()
2548 old_ip = self.cfg.GetMasterIP()
2549 if new_name == old_name and new_ip == old_ip:
2550 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2551 " cluster has changed",
2553 if new_ip != old_ip:
2554 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2555 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2556 " reachable on the network. Aborting." %
2557 new_ip, errors.ECODE_NOTUNIQUE)
2559 self.op.name = new_name
2561 def Exec(self, feedback_fn):
2562 """Rename the cluster.
2565 clustername = self.op.name
2568 # shutdown the master IP
2569 master = self.cfg.GetMasterNode()
2570 result = self.rpc.call_node_stop_master(master, False)
2571 result.Raise("Could not disable the master role")
2574 cluster = self.cfg.GetClusterInfo()
2575 cluster.cluster_name = clustername
2576 cluster.master_ip = ip
2577 self.cfg.Update(cluster, feedback_fn)
2579 # update the known hosts file
2580 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2581 node_list = self.cfg.GetNodeList()
2583 node_list.remove(master)
2586 result = self.rpc.call_upload_file(node_list,
2587 constants.SSH_KNOWN_HOSTS_FILE)
2588 for to_node, to_result in result.iteritems():
2589 msg = to_result.fail_msg
2591 msg = ("Copy of file %s to node %s failed: %s" %
2592 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2593 self.proc.LogWarning(msg)
2596 result = self.rpc.call_node_start_master(master, False, False)
2597 msg = result.fail_msg
2599 self.LogWarning("Could not re-enable the master role on"
2600 " the master, please restart manually: %s", msg)
2605 class LUSetClusterParams(LogicalUnit):
2606 """Change the parameters of the cluster.
2609 HPATH = "cluster-modify"
2610 HTYPE = constants.HTYPE_CLUSTER
2612 ("vg_name", None, _TMaybeString),
2613 ("enabled_hypervisors", None,
2614 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2615 ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2616 ("beparams", None, _TOr(_TDict, _TNone)),
2617 ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2618 ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2619 ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2620 ("uid_pool", None, _NoType),
2621 ("add_uids", None, _NoType),
2622 ("remove_uids", None, _NoType),
2623 ("maintain_node_health", None, _TMaybeBool),
2624 ("nicparams", None, _TOr(_TDict, _TNone)),
2625 ("drbd_helper", None, _TOr(_TString, _TNone)),
2626 ("default_iallocator", None, _TMaybeString),
2627 ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2628 ("hidden_os", None, _TOr(_TListOf(\
2631 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2633 ("blacklisted_os", None, _TOr(_TListOf(\
2636 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2641 def CheckArguments(self):
2645 if self.op.uid_pool:
2646 uidpool.CheckUidPool(self.op.uid_pool)
2648 if self.op.add_uids:
2649 uidpool.CheckUidPool(self.op.add_uids)
2651 if self.op.remove_uids:
2652 uidpool.CheckUidPool(self.op.remove_uids)
2654 def ExpandNames(self):
2655 # FIXME: in the future maybe other cluster params won't require checking on
2656 # all nodes to be modified.
2657 self.needed_locks = {
2658 locking.LEVEL_NODE: locking.ALL_SET,
2660 self.share_locks[locking.LEVEL_NODE] = 1
2662 def BuildHooksEnv(self):
2667 "OP_TARGET": self.cfg.GetClusterName(),
2668 "NEW_VG_NAME": self.op.vg_name,
2670 mn = self.cfg.GetMasterNode()
2671 return env, [mn], [mn]
2673 def CheckPrereq(self):
2674 """Check prerequisites.
2676 This checks whether the given params don't conflict and
2677 if the given volume group is valid.
2680 if self.op.vg_name is not None and not self.op.vg_name:
2681 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2682 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2683 " instances exist", errors.ECODE_INVAL)
2685 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2686 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2687 raise errors.OpPrereqError("Cannot disable drbd helper while"
2688 " drbd-based instances exist",
2691 node_list = self.acquired_locks[locking.LEVEL_NODE]
2693 # if vg_name not None, checks given volume group on all nodes
2695 vglist = self.rpc.call_vg_list(node_list)
2696 for node in node_list:
2697 msg = vglist[node].fail_msg
2699 # ignoring down node
2700 self.LogWarning("Error while gathering data on node %s"
2701 " (ignoring node): %s", node, msg)
2703 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2705 constants.MIN_VG_SIZE)
2707 raise errors.OpPrereqError("Error on node '%s': %s" %
2708 (node, vgstatus), errors.ECODE_ENVIRON)
2710 if self.op.drbd_helper:
2711 # checks given drbd helper on all nodes
2712 helpers = self.rpc.call_drbd_helper(node_list)
2713 for node in node_list:
2714 ninfo = self.cfg.GetNodeInfo(node)
2716 self.LogInfo("Not checking drbd helper on offline node %s", node)
2718 msg = helpers[node].fail_msg
2720 raise errors.OpPrereqError("Error checking drbd helper on node"
2721 " '%s': %s" % (node, msg),
2722 errors.ECODE_ENVIRON)
2723 node_helper = helpers[node].payload
2724 if node_helper != self.op.drbd_helper:
2725 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2726 (node, node_helper), errors.ECODE_ENVIRON)
2728 self.cluster = cluster = self.cfg.GetClusterInfo()
2729 # validate params changes
2730 if self.op.beparams:
2731 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2732 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2734 if self.op.nicparams:
2735 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2736 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2737 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2740 # check all instances for consistency
2741 for instance in self.cfg.GetAllInstancesInfo().values():
2742 for nic_idx, nic in enumerate(instance.nics):
2743 params_copy = copy.deepcopy(nic.nicparams)
2744 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2746 # check parameter syntax
2748 objects.NIC.CheckParameterSyntax(params_filled)
2749 except errors.ConfigurationError, err:
2750 nic_errors.append("Instance %s, nic/%d: %s" %
2751 (instance.name, nic_idx, err))
2753 # if we're moving instances to routed, check that they have an ip
2754 target_mode = params_filled[constants.NIC_MODE]
2755 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2756 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2757 (instance.name, nic_idx))
2759 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2760 "\n".join(nic_errors))
2762 # hypervisor list/parameters
2763 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2764 if self.op.hvparams:
2765 for hv_name, hv_dict in self.op.hvparams.items():
2766 if hv_name not in self.new_hvparams:
2767 self.new_hvparams[hv_name] = hv_dict
2769 self.new_hvparams[hv_name].update(hv_dict)
2771 # os hypervisor parameters
2772 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2774 for os_name, hvs in self.op.os_hvp.items():
2775 if os_name not in self.new_os_hvp:
2776 self.new_os_hvp[os_name] = hvs
2778 for hv_name, hv_dict in hvs.items():
2779 if hv_name not in self.new_os_hvp[os_name]:
2780 self.new_os_hvp[os_name][hv_name] = hv_dict
2782 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2785 self.new_osp = objects.FillDict(cluster.osparams, {})
2786 if self.op.osparams:
2787 for os_name, osp in self.op.osparams.items():
2788 if os_name not in self.new_osp:
2789 self.new_osp[os_name] = {}
2791 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2794 if not self.new_osp[os_name]:
2795 # we removed all parameters
2796 del self.new_osp[os_name]
2798 # check the parameter validity (remote check)
2799 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2800 os_name, self.new_osp[os_name])
2802 # changes to the hypervisor list
2803 if self.op.enabled_hypervisors is not None:
2804 self.hv_list = self.op.enabled_hypervisors
2805 for hv in self.hv_list:
2806 # if the hypervisor doesn't already exist in the cluster
2807 # hvparams, we initialize it to empty, and then (in both
2808 # cases) we make sure to fill the defaults, as we might not
2809 # have a complete defaults list if the hypervisor wasn't
2811 if hv not in new_hvp:
2813 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2814 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2816 self.hv_list = cluster.enabled_hypervisors
2818 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2819 # either the enabled list has changed, or the parameters have, validate
2820 for hv_name, hv_params in self.new_hvparams.items():
2821 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2822 (self.op.enabled_hypervisors and
2823 hv_name in self.op.enabled_hypervisors)):
2824 # either this is a new hypervisor, or its parameters have changed
2825 hv_class = hypervisor.GetHypervisor(hv_name)
2826 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2827 hv_class.CheckParameterSyntax(hv_params)
2828 _CheckHVParams(self, node_list, hv_name, hv_params)
2831 # no need to check any newly-enabled hypervisors, since the
2832 # defaults have already been checked in the above code-block
2833 for os_name, os_hvp in self.new_os_hvp.items():
2834 for hv_name, hv_params in os_hvp.items():
2835 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2836 # we need to fill in the new os_hvp on top of the actual hv_p
2837 cluster_defaults = self.new_hvparams.get(hv_name, {})
2838 new_osp = objects.FillDict(cluster_defaults, hv_params)
2839 hv_class = hypervisor.GetHypervisor(hv_name)
2840 hv_class.CheckParameterSyntax(new_osp)
2841 _CheckHVParams(self, node_list, hv_name, new_osp)
2843 if self.op.default_iallocator:
2844 alloc_script = utils.FindFile(self.op.default_iallocator,
2845 constants.IALLOCATOR_SEARCH_PATH,
2847 if alloc_script is None:
2848 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2849 " specified" % self.op.default_iallocator,
2852 def Exec(self, feedback_fn):
2853 """Change the parameters of the cluster.
2856 if self.op.vg_name is not None:
2857 new_volume = self.op.vg_name
2860 if new_volume != self.cfg.GetVGName():
2861 self.cfg.SetVGName(new_volume)
2863 feedback_fn("Cluster LVM configuration already in desired"
2864 " state, not changing")
2865 if self.op.drbd_helper is not None:
2866 new_helper = self.op.drbd_helper
2869 if new_helper != self.cfg.GetDRBDHelper():
2870 self.cfg.SetDRBDHelper(new_helper)
2872 feedback_fn("Cluster DRBD helper already in desired state,"
2874 if self.op.hvparams:
2875 self.cluster.hvparams = self.new_hvparams
2877 self.cluster.os_hvp = self.new_os_hvp
2878 if self.op.enabled_hypervisors is not None:
2879 self.cluster.hvparams = self.new_hvparams
2880 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2881 if self.op.beparams:
2882 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2883 if self.op.nicparams:
2884 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2885 if self.op.osparams:
2886 self.cluster.osparams = self.new_osp
2888 if self.op.candidate_pool_size is not None:
2889 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2890 # we need to update the pool size here, otherwise the save will fail
2891 _AdjustCandidatePool(self, [])
2893 if self.op.maintain_node_health is not None:
2894 self.cluster.maintain_node_health = self.op.maintain_node_health
2896 if self.op.add_uids is not None:
2897 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2899 if self.op.remove_uids is not None:
2900 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2902 if self.op.uid_pool is not None:
2903 self.cluster.uid_pool = self.op.uid_pool
2905 if self.op.default_iallocator is not None:
2906 self.cluster.default_iallocator = self.op.default_iallocator
2908 if self.op.reserved_lvs is not None:
2909 self.cluster.reserved_lvs = self.op.reserved_lvs
2911 def helper_os(aname, mods, desc):
2913 lst = getattr(self.cluster, aname)
2914 for key, val in mods:
2915 if key == constants.DDM_ADD:
2917 feedback_fn("OS %s already in %s, ignoring", val, desc)
2920 elif key == constants.DDM_REMOVE:
2924 feedback_fn("OS %s not found in %s, ignoring", val, desc)
2926 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2928 if self.op.hidden_os:
2929 helper_os("hidden_os", self.op.hidden_os, "hidden")
2931 if self.op.blacklisted_os:
2932 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2934 self.cfg.Update(self.cluster, feedback_fn)
2937 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2938 """Distribute additional files which are part of the cluster configuration.
2940 ConfigWriter takes care of distributing the config and ssconf files, but
2941 there are more files which should be distributed to all nodes. This function
2942 makes sure those are copied.
2944 @param lu: calling logical unit
2945 @param additional_nodes: list of nodes not in the config to distribute to
2948 # 1. Gather target nodes
2949 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2950 dist_nodes = lu.cfg.GetOnlineNodeList()
2951 if additional_nodes is not None:
2952 dist_nodes.extend(additional_nodes)
2953 if myself.name in dist_nodes:
2954 dist_nodes.remove(myself.name)
2956 # 2. Gather files to distribute
2957 dist_files = set([constants.ETC_HOSTS,
2958 constants.SSH_KNOWN_HOSTS_FILE,
2959 constants.RAPI_CERT_FILE,
2960 constants.RAPI_USERS_FILE,
2961 constants.CONFD_HMAC_KEY,
2962 constants.CLUSTER_DOMAIN_SECRET_FILE,
2965 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2966 for hv_name in enabled_hypervisors:
2967 hv_class = hypervisor.GetHypervisor(hv_name)
2968 dist_files.update(hv_class.GetAncillaryFiles())
2970 # 3. Perform the files upload
2971 for fname in dist_files:
2972 if os.path.exists(fname):
2973 result = lu.rpc.call_upload_file(dist_nodes, fname)
2974 for to_node, to_result in result.items():
2975 msg = to_result.fail_msg
2977 msg = ("Copy of file %s to node %s failed: %s" %
2978 (fname, to_node, msg))
2979 lu.proc.LogWarning(msg)
2982 class LURedistributeConfig(NoHooksLU):
2983 """Force the redistribution of cluster configuration.
2985 This is a very simple LU.
2990 def ExpandNames(self):
2991 self.needed_locks = {
2992 locking.LEVEL_NODE: locking.ALL_SET,
2994 self.share_locks[locking.LEVEL_NODE] = 1
2996 def Exec(self, feedback_fn):
2997 """Redistribute the configuration.
3000 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3001 _RedistributeAncillaryFiles(self)
3004 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3005 """Sleep and poll for an instance's disk to sync.
3008 if not instance.disks or disks is not None and not disks:
3011 disks = _ExpandCheckDisks(instance, disks)
3014 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3016 node = instance.primary_node
3019 lu.cfg.SetDiskID(dev, node)
3021 # TODO: Convert to utils.Retry
3024 degr_retries = 10 # in seconds, as we sleep 1 second each time
3028 cumul_degraded = False
3029 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3030 msg = rstats.fail_msg
3032 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3035 raise errors.RemoteError("Can't contact node %s for mirror data,"
3036 " aborting." % node)
3039 rstats = rstats.payload
3041 for i, mstat in enumerate(rstats):
3043 lu.LogWarning("Can't compute data for node %s/%s",
3044 node, disks[i].iv_name)
3047 cumul_degraded = (cumul_degraded or
3048 (mstat.is_degraded and mstat.sync_percent is None))
3049 if mstat.sync_percent is not None:
3051 if mstat.estimated_time is not None:
3052 rem_time = ("%s remaining (estimated)" %
3053 utils.FormatSeconds(mstat.estimated_time))
3054 max_time = mstat.estimated_time
3056 rem_time = "no time estimate"
3057 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3058 (disks[i].iv_name, mstat.sync_percent, rem_time))
3060 # if we're done but degraded, let's do a few small retries, to
3061 # make sure we see a stable and not transient situation; therefore
3062 # we force restart of the loop
3063 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3064 logging.info("Degraded disks found, %d retries left", degr_retries)
3072 time.sleep(min(60, max_time))
3075 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3076 return not cumul_degraded
3079 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3080 """Check that mirrors are not degraded.
3082 The ldisk parameter, if True, will change the test from the
3083 is_degraded attribute (which represents overall non-ok status for
3084 the device(s)) to the ldisk (representing the local storage status).
3087 lu.cfg.SetDiskID(dev, node)
3091 if on_primary or dev.AssembleOnSecondary():
3092 rstats = lu.rpc.call_blockdev_find(node, dev)
3093 msg = rstats.fail_msg
3095 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3097 elif not rstats.payload:
3098 lu.LogWarning("Can't find disk on node %s", node)
3102 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3104 result = result and not rstats.payload.is_degraded
3107 for child in dev.children:
3108 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3113 class LUDiagnoseOS(NoHooksLU):
3114 """Logical unit for OS diagnose/query.
3119 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3123 _BLK = "blacklisted"
3125 _FIELDS_STATIC = utils.FieldSet()
3126 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3127 "parameters", "api_versions", _HID, _BLK)
3129 def CheckArguments(self):
3131 raise errors.OpPrereqError("Selective OS query not supported",
3134 _CheckOutputFields(static=self._FIELDS_STATIC,
3135 dynamic=self._FIELDS_DYNAMIC,
3136 selected=self.op.output_fields)
3138 def ExpandNames(self):
3139 # Lock all nodes, in shared mode
3140 # Temporary removal of locks, should be reverted later
3141 # TODO: reintroduce locks when they are lighter-weight
3142 self.needed_locks = {}
3143 #self.share_locks[locking.LEVEL_NODE] = 1
3144 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3147 def _DiagnoseByOS(rlist):
3148 """Remaps a per-node return list into an a per-os per-node dictionary
3150 @param rlist: a map with node names as keys and OS objects as values
3153 @return: a dictionary with osnames as keys and as value another
3154 map, with nodes as keys and tuples of (path, status, diagnose,
3155 variants, parameters, api_versions) as values, eg::
3157 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3158 (/srv/..., False, "invalid api")],
3159 "node2": [(/srv/..., True, "", [], [])]}
3164 # we build here the list of nodes that didn't fail the RPC (at RPC
3165 # level), so that nodes with a non-responding node daemon don't
3166 # make all OSes invalid
3167 good_nodes = [node_name for node_name in rlist
3168 if not rlist[node_name].fail_msg]
3169 for node_name, nr in rlist.items():
3170 if nr.fail_msg or not nr.payload:
3172 for (name, path, status, diagnose, variants,
3173 params, api_versions) in nr.payload:
3174 if name not in all_os:
3175 # build a list of nodes for this os containing empty lists
3176 # for each node in node_list
3178 for nname in good_nodes:
3179 all_os[name][nname] = []
3180 # convert params from [name, help] to (name, help)
3181 params = [tuple(v) for v in params]
3182 all_os[name][node_name].append((path, status, diagnose,
3183 variants, params, api_versions))
3186 def Exec(self, feedback_fn):
3187 """Compute the list of OSes.
3190 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3191 node_data = self.rpc.call_os_diagnose(valid_nodes)
3192 pol = self._DiagnoseByOS(node_data)
3194 cluster = self.cfg.GetClusterInfo()
3196 for os_name in utils.NiceSort(pol.keys()):
3197 os_data = pol[os_name]
3200 (variants, params, api_versions) = null_state = (set(), set(), set())
3201 for idx, osl in enumerate(os_data.values()):
3202 valid = bool(valid and osl and osl[0][1])
3204 (variants, params, api_versions) = null_state
3206 node_variants, node_params, node_api = osl[0][3:6]
3207 if idx == 0: # first entry
3208 variants = set(node_variants)
3209 params = set(node_params)
3210 api_versions = set(node_api)
3211 else: # keep consistency
3212 variants.intersection_update(node_variants)
3213 params.intersection_update(node_params)
3214 api_versions.intersection_update(node_api)
3216 is_hid = os_name in cluster.hidden_os
3217 is_blk = os_name in cluster.blacklisted_os
3218 if ((self._HID not in self.op.output_fields and is_hid) or
3219 (self._BLK not in self.op.output_fields and is_blk) or
3220 (self._VLD not in self.op.output_fields and not valid)):
3223 for field in self.op.output_fields:
3226 elif field == self._VLD:
3228 elif field == "node_status":
3229 # this is just a copy of the dict
3231 for node_name, nos_list in os_data.items():
3232 val[node_name] = nos_list
3233 elif field == "variants":
3234 val = utils.NiceSort(list(variants))
3235 elif field == "parameters":
3237 elif field == "api_versions":
3238 val = list(api_versions)
3239 elif field == self._HID:
3241 elif field == self._BLK:
3244 raise errors.ParameterError(field)
3251 class LURemoveNode(LogicalUnit):
3252 """Logical unit for removing a node.
3255 HPATH = "node-remove"
3256 HTYPE = constants.HTYPE_NODE
3261 def BuildHooksEnv(self):
3264 This doesn't run on the target node in the pre phase as a failed
3265 node would then be impossible to remove.
3269 "OP_TARGET": self.op.node_name,
3270 "NODE_NAME": self.op.node_name,
3272 all_nodes = self.cfg.GetNodeList()
3274 all_nodes.remove(self.op.node_name)
3276 logging.warning("Node %s which is about to be removed not found"
3277 " in the all nodes list", self.op.node_name)
3278 return env, all_nodes, all_nodes
3280 def CheckPrereq(self):
3281 """Check prerequisites.
3284 - the node exists in the configuration
3285 - it does not have primary or secondary instances
3286 - it's not the master
3288 Any errors are signaled by raising errors.OpPrereqError.
3291 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3292 node = self.cfg.GetNodeInfo(self.op.node_name)
3293 assert node is not None
3295 instance_list = self.cfg.GetInstanceList()
3297 masternode = self.cfg.GetMasterNode()
3298 if node.name == masternode:
3299 raise errors.OpPrereqError("Node is the master node,"
3300 " you need to failover first.",
3303 for instance_name in instance_list:
3304 instance = self.cfg.GetInstanceInfo(instance_name)
3305 if node.name in instance.all_nodes:
3306 raise errors.OpPrereqError("Instance %s is still running on the node,"
3307 " please remove first." % instance_name,
3309 self.op.node_name = node.name
3312 def Exec(self, feedback_fn):
3313 """Removes the node from the cluster.
3317 logging.info("Stopping the node daemon and removing configs from node %s",
3320 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3322 # Promote nodes to master candidate as needed
3323 _AdjustCandidatePool(self, exceptions=[node.name])
3324 self.context.RemoveNode(node.name)
3326 # Run post hooks on the node before it's removed
3327 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3329 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3331 # pylint: disable-msg=W0702
3332 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3334 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3335 msg = result.fail_msg
3337 self.LogWarning("Errors encountered on the remote node while leaving"
3338 " the cluster: %s", msg)
3340 # Remove node from our /etc/hosts
3341 if self.cfg.GetClusterInfo().modify_etc_hosts:
3342 # FIXME: this should be done via an rpc call to node daemon
3343 utils.RemoveHostFromEtcHosts(node.name)
3344 _RedistributeAncillaryFiles(self)
3347 class LUQueryNodes(NoHooksLU):
3348 """Logical unit for querying nodes.
3351 # pylint: disable-msg=W0142
3354 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3355 ("use_locking", False, _TBool),
3359 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3360 "master_candidate", "offline", "drained"]
3362 _FIELDS_DYNAMIC = utils.FieldSet(
3364 "mtotal", "mnode", "mfree",
3366 "ctotal", "cnodes", "csockets",
3369 _FIELDS_STATIC = utils.FieldSet(*[
3370 "pinst_cnt", "sinst_cnt",
3371 "pinst_list", "sinst_list",
3372 "pip", "sip", "tags",
3374 "role"] + _SIMPLE_FIELDS
3377 def CheckArguments(self):
3378 _CheckOutputFields(static=self._FIELDS_STATIC,
3379 dynamic=self._FIELDS_DYNAMIC,
3380 selected=self.op.output_fields)
3382 def ExpandNames(self):
3383 self.needed_locks = {}
3384 self.share_locks[locking.LEVEL_NODE] = 1
3387 self.wanted = _GetWantedNodes(self, self.op.names)
3389 self.wanted = locking.ALL_SET
3391 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3392 self.do_locking = self.do_node_query and self.op.use_locking
3394 # if we don't request only static fields, we need to lock the nodes
3395 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3397 def Exec(self, feedback_fn):
3398 """Computes the list of nodes and their attributes.
3401 all_info = self.cfg.GetAllNodesInfo()
3403 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3404 elif self.wanted != locking.ALL_SET:
3405 nodenames = self.wanted
3406 missing = set(nodenames).difference(all_info.keys())
3408 raise errors.OpExecError(
3409 "Some nodes were removed before retrieving their data: %s" % missing)
3411 nodenames = all_info.keys()
3413 nodenames = utils.NiceSort(nodenames)
3414 nodelist = [all_info[name] for name in nodenames]
3416 # begin data gathering
3418 if self.do_node_query:
3420 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3421 self.cfg.GetHypervisorType())
3422 for name in nodenames:
3423 nodeinfo = node_data[name]
3424 if not nodeinfo.fail_msg and nodeinfo.payload:
3425 nodeinfo = nodeinfo.payload
3426 fn = utils.TryConvert
3428 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3429 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3430 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3431 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3432 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3433 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3434 "bootid": nodeinfo.get('bootid', None),
3435 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3436 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3439 live_data[name] = {}
3441 live_data = dict.fromkeys(nodenames, {})
3443 node_to_primary = dict([(name, set()) for name in nodenames])
3444 node_to_secondary = dict([(name, set()) for name in nodenames])
3446 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3447 "sinst_cnt", "sinst_list"))
3448 if inst_fields & frozenset(self.op.output_fields):
3449 inst_data = self.cfg.GetAllInstancesInfo()
3451 for inst in inst_data.values():
3452 if inst.primary_node in node_to_primary:
3453 node_to_primary[inst.primary_node].add(inst.name)
3454 for secnode in inst.secondary_nodes:
3455 if secnode in node_to_secondary:
3456 node_to_secondary[secnode].add(inst.name)
3458 master_node = self.cfg.GetMasterNode()
3460 # end data gathering
3463 for node in nodelist:
3465 for field in self.op.output_fields:
3466 if field in self._SIMPLE_FIELDS:
3467 val = getattr(node, field)
3468 elif field == "pinst_list":
3469 val = list(node_to_primary[node.name])
3470 elif field == "sinst_list":
3471 val = list(node_to_secondary[node.name])
3472 elif field == "pinst_cnt":
3473 val = len(node_to_primary[node.name])
3474 elif field == "sinst_cnt":
3475 val = len(node_to_secondary[node.name])
3476 elif field == "pip":
3477 val = node.primary_ip
3478 elif field == "sip":
3479 val = node.secondary_ip
3480 elif field == "tags":
3481 val = list(node.GetTags())
3482 elif field == "master":
3483 val = node.name == master_node
3484 elif self._FIELDS_DYNAMIC.Matches(field):
3485 val = live_data[node.name].get(field, None)
3486 elif field == "role":
3487 if node.name == master_node:
3489 elif node.master_candidate:
3498 raise errors.ParameterError(field)
3499 node_output.append(val)
3500 output.append(node_output)
3505 class LUQueryNodeVolumes(NoHooksLU):
3506 """Logical unit for getting volumes on node(s).
3510 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3511 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3514 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3515 _FIELDS_STATIC = utils.FieldSet("node")
3517 def CheckArguments(self):
3518 _CheckOutputFields(static=self._FIELDS_STATIC,
3519 dynamic=self._FIELDS_DYNAMIC,
3520 selected=self.op.output_fields)
3522 def ExpandNames(self):
3523 self.needed_locks = {}
3524 self.share_locks[locking.LEVEL_NODE] = 1
3525 if not self.op.nodes:
3526 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3528 self.needed_locks[locking.LEVEL_NODE] = \
3529 _GetWantedNodes(self, self.op.nodes)
3531 def Exec(self, feedback_fn):
3532 """Computes the list of nodes and their attributes.
3535 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3536 volumes = self.rpc.call_node_volumes(nodenames)
3538 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3539 in self.cfg.GetInstanceList()]
3541 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3544 for node in nodenames:
3545 nresult = volumes[node]
3548 msg = nresult.fail_msg
3550 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3553 node_vols = nresult.payload[:]
3554 node_vols.sort(key=lambda vol: vol['dev'])
3556 for vol in node_vols:
3558 for field in self.op.output_fields:
3561 elif field == "phys":
3565 elif field == "name":
3567 elif field == "size":
3568 val = int(float(vol['size']))
3569 elif field == "instance":
3571 if node not in lv_by_node[inst]:
3573 if vol['name'] in lv_by_node[inst][node]:
3579 raise errors.ParameterError(field)
3580 node_output.append(str(val))
3582 output.append(node_output)
3587 class LUQueryNodeStorage(NoHooksLU):
3588 """Logical unit for getting information on storage units on node(s).
3591 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3593 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3594 ("storage_type", _NoDefault, _CheckStorageType),
3595 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3596 ("name", None, _TMaybeString),
3600 def CheckArguments(self):
3601 _CheckOutputFields(static=self._FIELDS_STATIC,
3602 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3603 selected=self.op.output_fields)
3605 def ExpandNames(self):
3606 self.needed_locks = {}
3607 self.share_locks[locking.LEVEL_NODE] = 1
3610 self.needed_locks[locking.LEVEL_NODE] = \
3611 _GetWantedNodes(self, self.op.nodes)
3613 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3615 def Exec(self, feedback_fn):
3616 """Computes the list of nodes and their attributes.
3619 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3621 # Always get name to sort by
3622 if constants.SF_NAME in self.op.output_fields:
3623 fields = self.op.output_fields[:]
3625 fields = [constants.SF_NAME] + self.op.output_fields
3627 # Never ask for node or type as it's only known to the LU
3628 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3629 while extra in fields:
3630 fields.remove(extra)
3632 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3633 name_idx = field_idx[constants.SF_NAME]
3635 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3636 data = self.rpc.call_storage_list(self.nodes,
3637 self.op.storage_type, st_args,
3638 self.op.name, fields)
3642 for node in utils.NiceSort(self.nodes):
3643 nresult = data[node]
3647 msg = nresult.fail_msg
3649 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3652 rows = dict([(row[name_idx], row) for row in nresult.payload])
3654 for name in utils.NiceSort(rows.keys()):
3659 for field in self.op.output_fields:
3660 if field == constants.SF_NODE:
3662 elif field == constants.SF_TYPE:
3663 val = self.op.storage_type
3664 elif field in field_idx:
3665 val = row[field_idx[field]]
3667 raise errors.ParameterError(field)
3676 class LUModifyNodeStorage(NoHooksLU):
3677 """Logical unit for modifying a storage volume on a node.
3682 ("storage_type", _NoDefault, _CheckStorageType),
3683 ("name", _NoDefault, _TNonEmptyString),
3684 ("changes", _NoDefault, _TDict),
3688 def CheckArguments(self):
3689 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3691 storage_type = self.op.storage_type
3694 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3696 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3697 " modified" % storage_type,
3700 diff = set(self.op.changes.keys()) - modifiable
3702 raise errors.OpPrereqError("The following fields can not be modified for"
3703 " storage units of type '%s': %r" %
3704 (storage_type, list(diff)),
3707 def ExpandNames(self):
3708 self.needed_locks = {
3709 locking.LEVEL_NODE: self.op.node_name,
3712 def Exec(self, feedback_fn):
3713 """Computes the list of nodes and their attributes.
3716 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3717 result = self.rpc.call_storage_modify(self.op.node_name,
3718 self.op.storage_type, st_args,
3719 self.op.name, self.op.changes)
3720 result.Raise("Failed to modify storage unit '%s' on %s" %
3721 (self.op.name, self.op.node_name))
3724 class LUAddNode(LogicalUnit):
3725 """Logical unit for adding node to the cluster.
3729 HTYPE = constants.HTYPE_NODE
3732 ("primary_ip", None, _NoType),
3733 ("secondary_ip", None, _TMaybeString),
3734 ("readd", False, _TBool),
3737 def CheckArguments(self):
3738 # validate/normalize the node name
3739 self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3741 def BuildHooksEnv(self):
3744 This will run on all nodes before, and on all nodes + the new node after.
3748 "OP_TARGET": self.op.node_name,
3749 "NODE_NAME": self.op.node_name,
3750 "NODE_PIP": self.op.primary_ip,
3751 "NODE_SIP": self.op.secondary_ip,
3753 nodes_0 = self.cfg.GetNodeList()
3754 nodes_1 = nodes_0 + [self.op.node_name, ]
3755 return env, nodes_0, nodes_1
3757 def CheckPrereq(self):
3758 """Check prerequisites.
3761 - the new node is not already in the config
3763 - its parameters (single/dual homed) matches the cluster
3765 Any errors are signaled by raising errors.OpPrereqError.
3768 node_name = self.op.node_name
3771 dns_data = netutils.GetHostInfo(node_name)
3773 node = dns_data.name
3774 primary_ip = self.op.primary_ip = dns_data.ip
3775 if self.op.secondary_ip is None:
3776 self.op.secondary_ip = primary_ip
3777 if not netutils.IsValidIP4(self.op.secondary_ip):
3778 raise errors.OpPrereqError("Invalid secondary IP given",
3780 secondary_ip = self.op.secondary_ip
3782 node_list = cfg.GetNodeList()
3783 if not self.op.readd and node in node_list:
3784 raise errors.OpPrereqError("Node %s is already in the configuration" %
3785 node, errors.ECODE_EXISTS)
3786 elif self.op.readd and node not in node_list:
3787 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3790 self.changed_primary_ip = False
3792 for existing_node_name in node_list:
3793 existing_node = cfg.GetNodeInfo(existing_node_name)
3795 if self.op.readd and node == existing_node_name:
3796 if existing_node.secondary_ip != secondary_ip:
3797 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3798 " address configuration as before",
3800 if existing_node.primary_ip != primary_ip:
3801 self.changed_primary_ip = True
3805 if (existing_node.primary_ip == primary_ip or
3806 existing_node.secondary_ip == primary_ip or
3807 existing_node.primary_ip == secondary_ip or
3808 existing_node.secondary_ip == secondary_ip):
3809 raise errors.OpPrereqError("New node ip address(es) conflict with"
3810 " existing node %s" % existing_node.name,
3811 errors.ECODE_NOTUNIQUE)
3813 # check that the type of the node (single versus dual homed) is the
3814 # same as for the master
3815 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3816 master_singlehomed = myself.secondary_ip == myself.primary_ip
3817 newbie_singlehomed = secondary_ip == primary_ip
3818 if master_singlehomed != newbie_singlehomed:
3819 if master_singlehomed:
3820 raise errors.OpPrereqError("The master has no private ip but the"
3821 " new node has one",
3824 raise errors.OpPrereqError("The master has a private ip but the"
3825 " new node doesn't have one",
3828 # checks reachability
3829 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3830 raise errors.OpPrereqError("Node not reachable by ping",
3831 errors.ECODE_ENVIRON)
3833 if not newbie_singlehomed:
3834 # check reachability from my secondary ip to newbie's secondary ip
3835 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3836 source=myself.secondary_ip):
3837 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3838 " based ping to noded port",
3839 errors.ECODE_ENVIRON)
3846 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3849 self.new_node = self.cfg.GetNodeInfo(node)
3850 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3852 self.new_node = objects.Node(name=node,
3853 primary_ip=primary_ip,
3854 secondary_ip=secondary_ip,
3855 master_candidate=self.master_candidate,
3856 offline=False, drained=False)
3858 def Exec(self, feedback_fn):
3859 """Adds the new node to the cluster.
3862 new_node = self.new_node
3863 node = new_node.name
3865 # for re-adds, reset the offline/drained/master-candidate flags;
3866 # we need to reset here, otherwise offline would prevent RPC calls
3867 # later in the procedure; this also means that if the re-add
3868 # fails, we are left with a non-offlined, broken node
3870 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3871 self.LogInfo("Readding a node, the offline/drained flags were reset")
3872 # if we demote the node, we do cleanup later in the procedure
3873 new_node.master_candidate = self.master_candidate
3874 if self.changed_primary_ip:
3875 new_node.primary_ip = self.op.primary_ip
3877 # notify the user about any possible mc promotion
3878 if new_node.master_candidate:
3879 self.LogInfo("Node will be a master candidate")
3881 # check connectivity
3882 result = self.rpc.call_version([node])[node]
3883 result.Raise("Can't get version information from node %s" % node)
3884 if constants.PROTOCOL_VERSION == result.payload:
3885 logging.info("Communication to node %s fine, sw version %s match",
3886 node, result.payload)
3888 raise errors.OpExecError("Version mismatch master version %s,"
3889 " node version %s" %
3890 (constants.PROTOCOL_VERSION, result.payload))
3893 if self.cfg.GetClusterInfo().modify_ssh_setup:
3894 logging.info("Copy ssh key to node %s", node)
3895 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3897 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3898 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3902 keyarray.append(utils.ReadFile(i))
3904 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3905 keyarray[2], keyarray[3], keyarray[4],
3907 result.Raise("Cannot transfer ssh keys to the new node")
3909 # Add node to our /etc/hosts, and add key to known_hosts
3910 if self.cfg.GetClusterInfo().modify_etc_hosts:
3911 # FIXME: this should be done via an rpc call to node daemon
3912 utils.AddHostToEtcHosts(new_node.name)
3914 if new_node.secondary_ip != new_node.primary_ip:
3915 result = self.rpc.call_node_has_ip_address(new_node.name,
3916 new_node.secondary_ip)
3917 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3918 prereq=True, ecode=errors.ECODE_ENVIRON)
3919 if not result.payload:
3920 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3921 " you gave (%s). Please fix and re-run this"
3922 " command." % new_node.secondary_ip)
3924 node_verify_list = [self.cfg.GetMasterNode()]
3925 node_verify_param = {
3926 constants.NV_NODELIST: [node],
3927 # TODO: do a node-net-test as well?
3930 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3931 self.cfg.GetClusterName())
3932 for verifier in node_verify_list:
3933 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3934 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3936 for failed in nl_payload:
3937 feedback_fn("ssh/hostname verification failed"
3938 " (checking from %s): %s" %
3939 (verifier, nl_payload[failed]))
3940 raise errors.OpExecError("ssh/hostname verification failed.")
3943 _RedistributeAncillaryFiles(self)
3944 self.context.ReaddNode(new_node)
3945 # make sure we redistribute the config
3946 self.cfg.Update(new_node, feedback_fn)
3947 # and make sure the new node will not have old files around
3948 if not new_node.master_candidate:
3949 result = self.rpc.call_node_demote_from_mc(new_node.name)
3950 msg = result.fail_msg
3952 self.LogWarning("Node failed to demote itself from master"
3953 " candidate status: %s" % msg)
3955 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3956 self.context.AddNode(new_node, self.proc.GetECId())
3959 class LUSetNodeParams(LogicalUnit):
3960 """Modifies the parameters of a node.
3963 HPATH = "node-modify"
3964 HTYPE = constants.HTYPE_NODE
3967 ("master_candidate", None, _TMaybeBool),
3968 ("offline", None, _TMaybeBool),
3969 ("drained", None, _TMaybeBool),
3970 ("auto_promote", False, _TBool),
3975 def CheckArguments(self):
3976 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3977 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3978 if all_mods.count(None) == 3:
3979 raise errors.OpPrereqError("Please pass at least one modification",
3981 if all_mods.count(True) > 1:
3982 raise errors.OpPrereqError("Can't set the node into more than one"
3983 " state at the same time",
3986 # Boolean value that tells us whether we're offlining or draining the node
3987 self.offline_or_drain = (self.op.offline == True or
3988 self.op.drained == True)
3989 self.deoffline_or_drain = (self.op.offline == False or
3990 self.op.drained == False)
3991 self.might_demote = (self.op.master_candidate == False or
3992 self.offline_or_drain)
3994 self.lock_all = self.op.auto_promote and self.might_demote
3997 def ExpandNames(self):
3999 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4001 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4003 def BuildHooksEnv(self):
4006 This runs on the master node.
4010 "OP_TARGET": self.op.node_name,
4011 "MASTER_CANDIDATE": str(self.op.master_candidate),
4012 "OFFLINE": str(self.op.offline),
4013 "DRAINED": str(self.op.drained),
4015 nl = [self.cfg.GetMasterNode(),
4019 def CheckPrereq(self):
4020 """Check prerequisites.
4022 This only checks the instance list against the existing names.
4025 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4027 if (self.op.master_candidate is not None or
4028 self.op.drained is not None or
4029 self.op.offline is not None):
4030 # we can't change the master's node flags
4031 if self.op.node_name == self.cfg.GetMasterNode():
4032 raise errors.OpPrereqError("The master role can be changed"
4033 " only via master-failover",
4037 if node.master_candidate and self.might_demote and not self.lock_all:
4038 assert not self.op.auto_promote, "auto-promote set but lock_all not"
4039 # check if after removing the current node, we're missing master
4041 (mc_remaining, mc_should, _) = \
4042 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4043 if mc_remaining < mc_should:
4044 raise errors.OpPrereqError("Not enough master candidates, please"
4045 " pass auto_promote to allow promotion",
4048 if (self.op.master_candidate == True and
4049 ((node.offline and not self.op.offline == False) or
4050 (node.drained and not self.op.drained == False))):
4051 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4052 " to master_candidate" % node.name,
4055 # If we're being deofflined/drained, we'll MC ourself if needed
4056 if (self.deoffline_or_drain and not self.offline_or_drain and not
4057 self.op.master_candidate == True and not node.master_candidate):
4058 self.op.master_candidate = _DecideSelfPromotion(self)
4059 if self.op.master_candidate:
4060 self.LogInfo("Autopromoting node to master candidate")
4064 def Exec(self, feedback_fn):
4073 if self.op.offline is not None:
4074 node.offline = self.op.offline
4075 result.append(("offline", str(self.op.offline)))
4076 if self.op.offline == True:
4077 if node.master_candidate:
4078 node.master_candidate = False
4080 result.append(("master_candidate", "auto-demotion due to offline"))
4082 node.drained = False
4083 result.append(("drained", "clear drained status due to offline"))
4085 if self.op.master_candidate is not None:
4086 node.master_candidate = self.op.master_candidate
4088 result.append(("master_candidate", str(self.op.master_candidate)))
4089 if self.op.master_candidate == False:
4090 rrc = self.rpc.call_node_demote_from_mc(node.name)
4093 self.LogWarning("Node failed to demote itself: %s" % msg)
4095 if self.op.drained is not None:
4096 node.drained = self.op.drained
4097 result.append(("drained", str(self.op.drained)))
4098 if self.op.drained == True:
4099 if node.master_candidate:
4100 node.master_candidate = False
4102 result.append(("master_candidate", "auto-demotion due to drain"))
4103 rrc = self.rpc.call_node_demote_from_mc(node.name)
4106 self.LogWarning("Node failed to demote itself: %s" % msg)
4108 node.offline = False
4109 result.append(("offline", "clear offline status due to drain"))
4111 # we locked all nodes, we adjust the CP before updating this node
4113 _AdjustCandidatePool(self, [node.name])
4115 # this will trigger configuration file update, if needed
4116 self.cfg.Update(node, feedback_fn)
4118 # this will trigger job queue propagation or cleanup
4120 self.context.ReaddNode(node)
4125 class LUPowercycleNode(NoHooksLU):
4126 """Powercycles a node.
4135 def CheckArguments(self):
4136 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4137 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4138 raise errors.OpPrereqError("The node is the master and the force"
4139 " parameter was not set",
4142 def ExpandNames(self):
4143 """Locking for PowercycleNode.
4145 This is a last-resort option and shouldn't block on other
4146 jobs. Therefore, we grab no locks.
4149 self.needed_locks = {}
4151 def Exec(self, feedback_fn):
4155 result = self.rpc.call_node_powercycle(self.op.node_name,
4156 self.cfg.GetHypervisorType())
4157 result.Raise("Failed to schedule the reboot")
4158 return result.payload
4161 class LUQueryClusterInfo(NoHooksLU):
4162 """Query cluster configuration.
4167 def ExpandNames(self):
4168 self.needed_locks = {}
4170 def Exec(self, feedback_fn):
4171 """Return cluster config.
4174 cluster = self.cfg.GetClusterInfo()
4177 # Filter just for enabled hypervisors
4178 for os_name, hv_dict in cluster.os_hvp.items():
4179 os_hvp[os_name] = {}
4180 for hv_name, hv_params in hv_dict.items():
4181 if hv_name in cluster.enabled_hypervisors:
4182 os_hvp[os_name][hv_name] = hv_params
4185 "software_version": constants.RELEASE_VERSION,
4186 "protocol_version": constants.PROTOCOL_VERSION,
4187 "config_version": constants.CONFIG_VERSION,
4188 "os_api_version": max(constants.OS_API_VERSIONS),
4189 "export_version": constants.EXPORT_VERSION,
4190 "architecture": (platform.architecture()[0], platform.machine()),
4191 "name": cluster.cluster_name,
4192 "master": cluster.master_node,
4193 "default_hypervisor": cluster.enabled_hypervisors[0],
4194 "enabled_hypervisors": cluster.enabled_hypervisors,
4195 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4196 for hypervisor_name in cluster.enabled_hypervisors]),
4198 "beparams": cluster.beparams,
4199 "osparams": cluster.osparams,
4200 "nicparams": cluster.nicparams,
4201 "candidate_pool_size": cluster.candidate_pool_size,
4202 "master_netdev": cluster.master_netdev,
4203 "volume_group_name": cluster.volume_group_name,
4204 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4205 "file_storage_dir": cluster.file_storage_dir,
4206 "maintain_node_health": cluster.maintain_node_health,
4207 "ctime": cluster.ctime,
4208 "mtime": cluster.mtime,
4209 "uuid": cluster.uuid,
4210 "tags": list(cluster.GetTags()),
4211 "uid_pool": cluster.uid_pool,
4212 "default_iallocator": cluster.default_iallocator,
4213 "reserved_lvs": cluster.reserved_lvs,
4219 class LUQueryConfigValues(NoHooksLU):
4220 """Return configuration values.
4223 _OP_PARAMS = [_POutputFields]
4225 _FIELDS_DYNAMIC = utils.FieldSet()
4226 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4227 "watcher_pause", "volume_group_name")
4229 def CheckArguments(self):
4230 _CheckOutputFields(static=self._FIELDS_STATIC,
4231 dynamic=self._FIELDS_DYNAMIC,
4232 selected=self.op.output_fields)
4234 def ExpandNames(self):
4235 self.needed_locks = {}
4237 def Exec(self, feedback_fn):
4238 """Dump a representation of the cluster config to the standard output.
4242 for field in self.op.output_fields:
4243 if field == "cluster_name":
4244 entry = self.cfg.GetClusterName()
4245 elif field == "master_node":
4246 entry = self.cfg.GetMasterNode()
4247 elif field == "drain_flag":
4248 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4249 elif field == "watcher_pause":
4250 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4251 elif field == "volume_group_name":
4252 entry = self.cfg.GetVGName()
4254 raise errors.ParameterError(field)
4255 values.append(entry)
4259 class LUActivateInstanceDisks(NoHooksLU):
4260 """Bring up an instance's disks.
4265 ("ignore_size", False, _TBool),
4269 def ExpandNames(self):
4270 self._ExpandAndLockInstance()
4271 self.needed_locks[locking.LEVEL_NODE] = []
4272 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4274 def DeclareLocks(self, level):
4275 if level == locking.LEVEL_NODE:
4276 self._LockInstancesNodes()
4278 def CheckPrereq(self):
4279 """Check prerequisites.
4281 This checks that the instance is in the cluster.
4284 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4285 assert self.instance is not None, \
4286 "Cannot retrieve locked instance %s" % self.op.instance_name
4287 _CheckNodeOnline(self, self.instance.primary_node)
4289 def Exec(self, feedback_fn):
4290 """Activate the disks.
4293 disks_ok, disks_info = \
4294 _AssembleInstanceDisks(self, self.instance,
4295 ignore_size=self.op.ignore_size)
4297 raise errors.OpExecError("Cannot activate block devices")
4302 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4304 """Prepare the block devices for an instance.
4306 This sets up the block devices on all nodes.
4308 @type lu: L{LogicalUnit}
4309 @param lu: the logical unit on whose behalf we execute
4310 @type instance: L{objects.Instance}
4311 @param instance: the instance for whose disks we assemble
4312 @type disks: list of L{objects.Disk} or None
4313 @param disks: which disks to assemble (or all, if None)
4314 @type ignore_secondaries: boolean
4315 @param ignore_secondaries: if true, errors on secondary nodes
4316 won't result in an error return from the function
4317 @type ignore_size: boolean
4318 @param ignore_size: if true, the current known size of the disk
4319 will not be used during the disk activation, useful for cases
4320 when the size is wrong
4321 @return: False if the operation failed, otherwise a list of
4322 (host, instance_visible_name, node_visible_name)
4323 with the mapping from node devices to instance devices
4328 iname = instance.name
4329 disks = _ExpandCheckDisks(instance, disks)
4331 # With the two passes mechanism we try to reduce the window of
4332 # opportunity for the race condition of switching DRBD to primary
4333 # before handshaking occured, but we do not eliminate it
4335 # The proper fix would be to wait (with some limits) until the
4336 # connection has been made and drbd transitions from WFConnection
4337 # into any other network-connected state (Connected, SyncTarget,
4340 # 1st pass, assemble on all nodes in secondary mode
4341 for inst_disk in disks:
4342 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4344 node_disk = node_disk.Copy()
4345 node_disk.UnsetSize()
4346 lu.cfg.SetDiskID(node_disk, node)
4347 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4348 msg = result.fail_msg
4350 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4351 " (is_primary=False, pass=1): %s",
4352 inst_disk.iv_name, node, msg)
4353 if not ignore_secondaries:
4356 # FIXME: race condition on drbd migration to primary
4358 # 2nd pass, do only the primary node
4359 for inst_disk in disks:
4362 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4363 if node != instance.primary_node:
4366 node_disk = node_disk.Copy()
4367 node_disk.UnsetSize()
4368 lu.cfg.SetDiskID(node_disk, node)
4369 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4370 msg = result.fail_msg
4372 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4373 " (is_primary=True, pass=2): %s",
4374 inst_disk.iv_name, node, msg)
4377 dev_path = result.payload
4379 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4381 # leave the disks configured for the primary node
4382 # this is a workaround that would be fixed better by
4383 # improving the logical/physical id handling
4385 lu.cfg.SetDiskID(disk, instance.primary_node)
4387 return disks_ok, device_info
4390 def _StartInstanceDisks(lu, instance, force):
4391 """Start the disks of an instance.
4394 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4395 ignore_secondaries=force)
4397 _ShutdownInstanceDisks(lu, instance)
4398 if force is not None and not force:
4399 lu.proc.LogWarning("", hint="If the message above refers to a"
4401 " you can retry the operation using '--force'.")
4402 raise errors.OpExecError("Disk consistency error")
4405 class LUDeactivateInstanceDisks(NoHooksLU):
4406 """Shutdown an instance's disks.
4414 def ExpandNames(self):
4415 self._ExpandAndLockInstance()
4416 self.needed_locks[locking.LEVEL_NODE] = []
4417 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4419 def DeclareLocks(self, level):
4420 if level == locking.LEVEL_NODE:
4421 self._LockInstancesNodes()
4423 def CheckPrereq(self):
4424 """Check prerequisites.
4426 This checks that the instance is in the cluster.
4429 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4430 assert self.instance is not None, \
4431 "Cannot retrieve locked instance %s" % self.op.instance_name
4433 def Exec(self, feedback_fn):
4434 """Deactivate the disks
4437 instance = self.instance
4438 _SafeShutdownInstanceDisks(self, instance)
4441 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4442 """Shutdown block devices of an instance.
4444 This function checks if an instance is running, before calling
4445 _ShutdownInstanceDisks.
4448 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4449 _ShutdownInstanceDisks(lu, instance, disks=disks)
4452 def _ExpandCheckDisks(instance, disks):
4453 """Return the instance disks selected by the disks list
4455 @type disks: list of L{objects.Disk} or None
4456 @param disks: selected disks
4457 @rtype: list of L{objects.Disk}
4458 @return: selected instance disks to act on
4462 return instance.disks
4464 if not set(disks).issubset(instance.disks):
4465 raise errors.ProgrammerError("Can only act on disks belonging to the"
4470 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4471 """Shutdown block devices of an instance.
4473 This does the shutdown on all nodes of the instance.
4475 If the ignore_primary is false, errors on the primary node are
4480 disks = _ExpandCheckDisks(instance, disks)
4483 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4484 lu.cfg.SetDiskID(top_disk, node)
4485 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4486 msg = result.fail_msg
4488 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4489 disk.iv_name, node, msg)
4490 if not ignore_primary or node != instance.primary_node:
4495 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4496 """Checks if a node has enough free memory.
4498 This function check if a given node has the needed amount of free
4499 memory. In case the node has less memory or we cannot get the
4500 information from the node, this function raise an OpPrereqError
4503 @type lu: C{LogicalUnit}
4504 @param lu: a logical unit from which we get configuration data
4506 @param node: the node to check
4507 @type reason: C{str}
4508 @param reason: string to use in the error message
4509 @type requested: C{int}
4510 @param requested: the amount of memory in MiB to check for
4511 @type hypervisor_name: C{str}
4512 @param hypervisor_name: the hypervisor to ask for memory stats
4513 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4514 we cannot check the node
4517 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4518 nodeinfo[node].Raise("Can't get data from node %s" % node,
4519 prereq=True, ecode=errors.ECODE_ENVIRON)
4520 free_mem = nodeinfo[node].payload.get('memory_free', None)
4521 if not isinstance(free_mem, int):
4522 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4523 " was '%s'" % (node, free_mem),
4524 errors.ECODE_ENVIRON)
4525 if requested > free_mem:
4526 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4527 " needed %s MiB, available %s MiB" %
4528 (node, reason, requested, free_mem),
4532 def _CheckNodesFreeDisk(lu, nodenames, requested):
4533 """Checks if nodes have enough free disk space in the default VG.
4535 This function check if all given nodes have the needed amount of
4536 free disk. In case any node has less disk or we cannot get the
4537 information from the node, this function raise an OpPrereqError
4540 @type lu: C{LogicalUnit}
4541 @param lu: a logical unit from which we get configuration data
4542 @type nodenames: C{list}
4543 @param nodenames: the list of node names to check
4544 @type requested: C{int}
4545 @param requested: the amount of disk in MiB to check for
4546 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4547 we cannot check the node
4550 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4551 lu.cfg.GetHypervisorType())
4552 for node in nodenames:
4553 info = nodeinfo[node]
4554 info.Raise("Cannot get current information from node %s" % node,
4555 prereq=True, ecode=errors.ECODE_ENVIRON)
4556 vg_free = info.payload.get("vg_free", None)
4557 if not isinstance(vg_free, int):
4558 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4559 " result was '%s'" % (node, vg_free),
4560 errors.ECODE_ENVIRON)
4561 if requested > vg_free:
4562 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4563 " required %d MiB, available %d MiB" %
4564 (node, requested, vg_free),
4568 class LUStartupInstance(LogicalUnit):
4569 """Starts an instance.
4572 HPATH = "instance-start"
4573 HTYPE = constants.HTYPE_INSTANCE
4577 ("hvparams", _EmptyDict, _TDict),
4578 ("beparams", _EmptyDict, _TDict),
4582 def CheckArguments(self):
4584 if self.op.beparams:
4585 # fill the beparams dict
4586 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4588 def ExpandNames(self):
4589 self._ExpandAndLockInstance()
4591 def BuildHooksEnv(self):
4594 This runs on master, primary and secondary nodes of the instance.
4598 "FORCE": self.op.force,
4600 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4601 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4604 def CheckPrereq(self):
4605 """Check prerequisites.
4607 This checks that the instance is in the cluster.
4610 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4611 assert self.instance is not None, \
4612 "Cannot retrieve locked instance %s" % self.op.instance_name
4615 if self.op.hvparams:
4616 # check hypervisor parameter syntax (locally)
4617 cluster = self.cfg.GetClusterInfo()
4618 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4619 filled_hvp = cluster.FillHV(instance)
4620 filled_hvp.update(self.op.hvparams)
4621 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4622 hv_type.CheckParameterSyntax(filled_hvp)
4623 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4625 _CheckNodeOnline(self, instance.primary_node)
4627 bep = self.cfg.GetClusterInfo().FillBE(instance)
4628 # check bridges existence
4629 _CheckInstanceBridgesExist(self, instance)
4631 remote_info = self.rpc.call_instance_info(instance.primary_node,
4633 instance.hypervisor)
4634 remote_info.Raise("Error checking node %s" % instance.primary_node,
4635 prereq=True, ecode=errors.ECODE_ENVIRON)
4636 if not remote_info.payload: # not running already
4637 _CheckNodeFreeMemory(self, instance.primary_node,
4638 "starting instance %s" % instance.name,
4639 bep[constants.BE_MEMORY], instance.hypervisor)
4641 def Exec(self, feedback_fn):
4642 """Start the instance.
4645 instance = self.instance
4646 force = self.op.force
4648 self.cfg.MarkInstanceUp(instance.name)
4650 node_current = instance.primary_node
4652 _StartInstanceDisks(self, instance, force)
4654 result = self.rpc.call_instance_start(node_current, instance,
4655 self.op.hvparams, self.op.beparams)
4656 msg = result.fail_msg
4658 _ShutdownInstanceDisks(self, instance)
4659 raise errors.OpExecError("Could not start instance: %s" % msg)
4662 class LURebootInstance(LogicalUnit):
4663 """Reboot an instance.
4666 HPATH = "instance-reboot"
4667 HTYPE = constants.HTYPE_INSTANCE
4670 ("ignore_secondaries", False, _TBool),
4671 ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4676 def ExpandNames(self):
4677 self._ExpandAndLockInstance()
4679 def BuildHooksEnv(self):
4682 This runs on master, primary and secondary nodes of the instance.
4686 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4687 "REBOOT_TYPE": self.op.reboot_type,
4688 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4690 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4691 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4694 def CheckPrereq(self):
4695 """Check prerequisites.
4697 This checks that the instance is in the cluster.
4700 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4701 assert self.instance is not None, \
4702 "Cannot retrieve locked instance %s" % self.op.instance_name
4704 _CheckNodeOnline(self, instance.primary_node)
4706 # check bridges existence
4707 _CheckInstanceBridgesExist(self, instance)
4709 def Exec(self, feedback_fn):
4710 """Reboot the instance.
4713 instance = self.instance
4714 ignore_secondaries = self.op.ignore_secondaries
4715 reboot_type = self.op.reboot_type
4717 node_current = instance.primary_node
4719 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4720 constants.INSTANCE_REBOOT_HARD]:
4721 for disk in instance.disks:
4722 self.cfg.SetDiskID(disk, node_current)
4723 result = self.rpc.call_instance_reboot(node_current, instance,
4725 self.op.shutdown_timeout)
4726 result.Raise("Could not reboot instance")
4728 result = self.rpc.call_instance_shutdown(node_current, instance,
4729 self.op.shutdown_timeout)
4730 result.Raise("Could not shutdown instance for full reboot")
4731 _ShutdownInstanceDisks(self, instance)
4732 _StartInstanceDisks(self, instance, ignore_secondaries)
4733 result = self.rpc.call_instance_start(node_current, instance, None, None)
4734 msg = result.fail_msg
4736 _ShutdownInstanceDisks(self, instance)
4737 raise errors.OpExecError("Could not start instance for"
4738 " full reboot: %s" % msg)
4740 self.cfg.MarkInstanceUp(instance.name)
4743 class LUShutdownInstance(LogicalUnit):
4744 """Shutdown an instance.
4747 HPATH = "instance-stop"
4748 HTYPE = constants.HTYPE_INSTANCE
4751 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4755 def ExpandNames(self):
4756 self._ExpandAndLockInstance()
4758 def BuildHooksEnv(self):
4761 This runs on master, primary and secondary nodes of the instance.
4764 env = _BuildInstanceHookEnvByObject(self, self.instance)
4765 env["TIMEOUT"] = self.op.timeout
4766 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4769 def CheckPrereq(self):
4770 """Check prerequisites.
4772 This checks that the instance is in the cluster.
4775 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4776 assert self.instance is not None, \
4777 "Cannot retrieve locked instance %s" % self.op.instance_name
4778 _CheckNodeOnline(self, self.instance.primary_node)
4780 def Exec(self, feedback_fn):
4781 """Shutdown the instance.
4784 instance = self.instance
4785 node_current = instance.primary_node
4786 timeout = self.op.timeout
4787 self.cfg.MarkInstanceDown(instance.name)
4788 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4789 msg = result.fail_msg
4791 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4793 _ShutdownInstanceDisks(self, instance)
4796 class LUReinstallInstance(LogicalUnit):
4797 """Reinstall an instance.
4800 HPATH = "instance-reinstall"
4801 HTYPE = constants.HTYPE_INSTANCE
4804 ("os_type", None, _TMaybeString),
4805 ("force_variant", False, _TBool),
4809 def ExpandNames(self):
4810 self._ExpandAndLockInstance()
4812 def BuildHooksEnv(self):
4815 This runs on master, primary and secondary nodes of the instance.
4818 env = _BuildInstanceHookEnvByObject(self, self.instance)
4819 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4822 def CheckPrereq(self):
4823 """Check prerequisites.
4825 This checks that the instance is in the cluster and is not running.
4828 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4829 assert instance is not None, \
4830 "Cannot retrieve locked instance %s" % self.op.instance_name
4831 _CheckNodeOnline(self, instance.primary_node)
4833 if instance.disk_template == constants.DT_DISKLESS:
4834 raise errors.OpPrereqError("Instance '%s' has no disks" %
4835 self.op.instance_name,
4837 _CheckInstanceDown(self, instance, "cannot reinstall")
4839 if self.op.os_type is not None:
4841 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4842 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4844 self.instance = instance
4846 def Exec(self, feedback_fn):
4847 """Reinstall the instance.
4850 inst = self.instance
4852 if self.op.os_type is not None:
4853 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4854 inst.os = self.op.os_type
4855 self.cfg.Update(inst, feedback_fn)
4857 _StartInstanceDisks(self, inst, None)
4859 feedback_fn("Running the instance OS create scripts...")
4860 # FIXME: pass debug option from opcode to backend
4861 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4862 self.op.debug_level)
4863 result.Raise("Could not install OS for instance %s on node %s" %
4864 (inst.name, inst.primary_node))
4866 _ShutdownInstanceDisks(self, inst)
4869 class LURecreateInstanceDisks(LogicalUnit):
4870 """Recreate an instance's missing disks.
4873 HPATH = "instance-recreate-disks"
4874 HTYPE = constants.HTYPE_INSTANCE
4877 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4881 def ExpandNames(self):
4882 self._ExpandAndLockInstance()
4884 def BuildHooksEnv(self):
4887 This runs on master, primary and secondary nodes of the instance.
4890 env = _BuildInstanceHookEnvByObject(self, self.instance)
4891 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4894 def CheckPrereq(self):
4895 """Check prerequisites.
4897 This checks that the instance is in the cluster and is not running.
4900 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4901 assert instance is not None, \
4902 "Cannot retrieve locked instance %s" % self.op.instance_name
4903 _CheckNodeOnline(self, instance.primary_node)
4905 if instance.disk_template == constants.DT_DISKLESS:
4906 raise errors.OpPrereqError("Instance '%s' has no disks" %
4907 self.op.instance_name, errors.ECODE_INVAL)
4908 _CheckInstanceDown(self, instance, "cannot recreate disks")
4910 if not self.op.disks:
4911 self.op.disks = range(len(instance.disks))
4913 for idx in self.op.disks:
4914 if idx >= len(instance.disks):
4915 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4918 self.instance = instance
4920 def Exec(self, feedback_fn):
4921 """Recreate the disks.
4925 for idx, _ in enumerate(self.instance.disks):
4926 if idx not in self.op.disks: # disk idx has not been passed in
4930 _CreateDisks(self, self.instance, to_skip=to_skip)
4933 class LURenameInstance(LogicalUnit):
4934 """Rename an instance.
4937 HPATH = "instance-rename"
4938 HTYPE = constants.HTYPE_INSTANCE
4941 ("new_name", _NoDefault, _TNonEmptyString),
4942 ("ip_check", False, _TBool),
4943 ("name_check", True, _TBool),
4946 def CheckArguments(self):
4950 if self.op.ip_check and not self.op.name_check:
4951 # TODO: make the ip check more flexible and not depend on the name check
4952 raise errors.OpPrereqError("Cannot do ip check without a name check",
4955 def BuildHooksEnv(self):
4958 This runs on master, primary and secondary nodes of the instance.
4961 env = _BuildInstanceHookEnvByObject(self, self.instance)
4962 env["INSTANCE_NEW_NAME"] = self.op.new_name
4963 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4966 def CheckPrereq(self):
4967 """Check prerequisites.
4969 This checks that the instance is in the cluster and is not running.
4972 self.op.instance_name = _ExpandInstanceName(self.cfg,
4973 self.op.instance_name)
4974 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4975 assert instance is not None
4976 _CheckNodeOnline(self, instance.primary_node)
4977 _CheckInstanceDown(self, instance, "cannot rename")
4978 self.instance = instance
4980 new_name = self.op.new_name
4981 if self.op.name_check:
4982 hostinfo = netutils.HostInfo(netutils.HostInfo.NormalizeName(new_name))
4983 new_name = self.op.new_name = hostinfo.name
4984 if (self.op.ip_check and
4985 netutils.TcpPing(hostinfo.ip, constants.DEFAULT_NODED_PORT)):
4986 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4987 (hostinfo.ip, new_name),
4988 errors.ECODE_NOTUNIQUE)
4990 instance_list = self.cfg.GetInstanceList()
4991 if new_name in instance_list:
4992 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4993 new_name, errors.ECODE_EXISTS)
4996 def Exec(self, feedback_fn):
4997 """Reinstall the instance.
5000 inst = self.instance
5001 old_name = inst.name
5003 if inst.disk_template == constants.DT_FILE:
5004 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5006 self.cfg.RenameInstance(inst.name, self.op.new_name)
5007 # Change the instance lock. This is definitely safe while we hold the BGL
5008 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5009 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5011 # re-read the instance from the configuration after rename
5012 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5014 if inst.disk_template == constants.DT_FILE:
5015 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5016 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5017 old_file_storage_dir,
5018 new_file_storage_dir)
5019 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5020 " (but the instance has been renamed in Ganeti)" %
5021 (inst.primary_node, old_file_storage_dir,
5022 new_file_storage_dir))
5024 _StartInstanceDisks(self, inst, None)
5026 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5027 old_name, self.op.debug_level)
5028 msg = result.fail_msg
5030 msg = ("Could not run OS rename script for instance %s on node %s"
5031 " (but the instance has been renamed in Ganeti): %s" %
5032 (inst.name, inst.primary_node, msg))
5033 self.proc.LogWarning(msg)
5035 _ShutdownInstanceDisks(self, inst)
5040 class LURemoveInstance(LogicalUnit):
5041 """Remove an instance.
5044 HPATH = "instance-remove"
5045 HTYPE = constants.HTYPE_INSTANCE
5048 ("ignore_failures", False, _TBool),
5053 def ExpandNames(self):
5054 self._ExpandAndLockInstance()
5055 self.needed_locks[locking.LEVEL_NODE] = []
5056 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5058 def DeclareLocks(self, level):
5059 if level == locking.LEVEL_NODE:
5060 self._LockInstancesNodes()
5062 def BuildHooksEnv(self):
5065 This runs on master, primary and secondary nodes of the instance.
5068 env = _BuildInstanceHookEnvByObject(self, self.instance)
5069 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5070 nl = [self.cfg.GetMasterNode()]
5071 nl_post = list(self.instance.all_nodes) + nl
5072 return env, nl, nl_post
5074 def CheckPrereq(self):
5075 """Check prerequisites.
5077 This checks that the instance is in the cluster.
5080 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5081 assert self.instance is not None, \
5082 "Cannot retrieve locked instance %s" % self.op.instance_name
5084 def Exec(self, feedback_fn):
5085 """Remove the instance.
5088 instance = self.instance
5089 logging.info("Shutting down instance %s on node %s",
5090 instance.name, instance.primary_node)
5092 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5093 self.op.shutdown_timeout)
5094 msg = result.fail_msg
5096 if self.op.ignore_failures:
5097 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5099 raise errors.OpExecError("Could not shutdown instance %s on"
5101 (instance.name, instance.primary_node, msg))
5103 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5106 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5107 """Utility function to remove an instance.
5110 logging.info("Removing block devices for instance %s", instance.name)
5112 if not _RemoveDisks(lu, instance):
5113 if not ignore_failures:
5114 raise errors.OpExecError("Can't remove instance's disks")
5115 feedback_fn("Warning: can't remove instance's disks")
5117 logging.info("Removing instance %s out of cluster config", instance.name)
5119 lu.cfg.RemoveInstance(instance.name)
5121 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5122 "Instance lock removal conflict"
5124 # Remove lock for the instance
5125 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5128 class LUQueryInstances(NoHooksLU):
5129 """Logical unit for querying instances.
5132 # pylint: disable-msg=W0142
5134 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5135 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5136 ("use_locking", False, _TBool),
5139 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5140 "serial_no", "ctime", "mtime", "uuid"]
5141 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5143 "disk_template", "ip", "mac", "bridge",
5144 "nic_mode", "nic_link",
5145 "sda_size", "sdb_size", "vcpus", "tags",
5146 "network_port", "beparams",
5147 r"(disk)\.(size)/([0-9]+)",
5148 r"(disk)\.(sizes)", "disk_usage",
5149 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5150 r"(nic)\.(bridge)/([0-9]+)",
5151 r"(nic)\.(macs|ips|modes|links|bridges)",
5152 r"(disk|nic)\.(count)",
5154 ] + _SIMPLE_FIELDS +
5156 for name in constants.HVS_PARAMETERS
5157 if name not in constants.HVC_GLOBALS] +
5159 for name in constants.BES_PARAMETERS])
5160 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5166 def CheckArguments(self):
5167 _CheckOutputFields(static=self._FIELDS_STATIC,
5168 dynamic=self._FIELDS_DYNAMIC,
5169 selected=self.op.output_fields)
5171 def ExpandNames(self):
5172 self.needed_locks = {}
5173 self.share_locks[locking.LEVEL_INSTANCE] = 1
5174 self.share_locks[locking.LEVEL_NODE] = 1
5177 self.wanted = _GetWantedInstances(self, self.op.names)
5179 self.wanted = locking.ALL_SET
5181 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5182 self.do_locking = self.do_node_query and self.op.use_locking
5184 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5185 self.needed_locks[locking.LEVEL_NODE] = []
5186 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5188 def DeclareLocks(self, level):
5189 if level == locking.LEVEL_NODE and self.do_locking:
5190 self._LockInstancesNodes()
5192 def Exec(self, feedback_fn):
5193 """Computes the list of nodes and their attributes.
5196 # pylint: disable-msg=R0912
5197 # way too many branches here
5198 all_info = self.cfg.GetAllInstancesInfo()
5199 if self.wanted == locking.ALL_SET:
5200 # caller didn't specify instance names, so ordering is not important
5202 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5204 instance_names = all_info.keys()
5205 instance_names = utils.NiceSort(instance_names)
5207 # caller did specify names, so we must keep the ordering
5209 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5211 tgt_set = all_info.keys()
5212 missing = set(self.wanted).difference(tgt_set)
5214 raise errors.OpExecError("Some instances were removed before"
5215 " retrieving their data: %s" % missing)
5216 instance_names = self.wanted
5218 instance_list = [all_info[iname] for iname in instance_names]
5220 # begin data gathering
5222 nodes = frozenset([inst.primary_node for inst in instance_list])
5223 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5227 if self.do_node_query:
5229 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5231 result = node_data[name]
5233 # offline nodes will be in both lists
5234 off_nodes.append(name)
5236 bad_nodes.append(name)
5239 live_data.update(result.payload)
5240 # else no instance is alive
5242 live_data = dict([(name, {}) for name in instance_names])
5244 # end data gathering
5249 cluster = self.cfg.GetClusterInfo()
5250 for instance in instance_list:
5252 i_hv = cluster.FillHV(instance, skip_globals=True)
5253 i_be = cluster.FillBE(instance)
5254 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5255 for field in self.op.output_fields:
5256 st_match = self._FIELDS_STATIC.Matches(field)
5257 if field in self._SIMPLE_FIELDS:
5258 val = getattr(instance, field)
5259 elif field == "pnode":
5260 val = instance.primary_node
5261 elif field == "snodes":
5262 val = list(instance.secondary_nodes)
5263 elif field == "admin_state":
5264 val = instance.admin_up
5265 elif field == "oper_state":
5266 if instance.primary_node in bad_nodes:
5269 val = bool(live_data.get(instance.name))
5270 elif field == "status":
5271 if instance.primary_node in off_nodes:
5272 val = "ERROR_nodeoffline"
5273 elif instance.primary_node in bad_nodes:
5274 val = "ERROR_nodedown"
5276 running = bool(live_data.get(instance.name))
5278 if instance.admin_up:
5283 if instance.admin_up:
5287 elif field == "oper_ram":
5288 if instance.primary_node in bad_nodes:
5290 elif instance.name in live_data:
5291 val = live_data[instance.name].get("memory", "?")
5294 elif field == "oper_vcpus":
5295 if instance.primary_node in bad_nodes:
5297 elif instance.name in live_data:
5298 val = live_data[instance.name].get("vcpus", "?")
5301 elif field == "vcpus":
5302 val = i_be[constants.BE_VCPUS]
5303 elif field == "disk_template":
5304 val = instance.disk_template
5307 val = instance.nics[0].ip
5310 elif field == "nic_mode":
5312 val = i_nicp[0][constants.NIC_MODE]
5315 elif field == "nic_link":
5317 val = i_nicp[0][constants.NIC_LINK]
5320 elif field == "bridge":
5321 if (instance.nics and
5322 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5323 val = i_nicp[0][constants.NIC_LINK]
5326 elif field == "mac":
5328 val = instance.nics[0].mac
5331 elif field == "sda_size" or field == "sdb_size":
5332 idx = ord(field[2]) - ord('a')
5334 val = instance.FindDisk(idx).size
5335 except errors.OpPrereqError:
5337 elif field == "disk_usage": # total disk usage per node
5338 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5339 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5340 elif field == "tags":
5341 val = list(instance.GetTags())
5342 elif field == "hvparams":
5344 elif (field.startswith(HVPREFIX) and
5345 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5346 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5347 val = i_hv.get(field[len(HVPREFIX):], None)
5348 elif field == "beparams":
5350 elif (field.startswith(BEPREFIX) and
5351 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5352 val = i_be.get(field[len(BEPREFIX):], None)
5353 elif st_match and st_match.groups():
5354 # matches a variable list
5355 st_groups = st_match.groups()
5356 if st_groups and st_groups[0] == "disk":
5357 if st_groups[1] == "count":
5358 val = len(instance.disks)
5359 elif st_groups[1] == "sizes":
5360 val = [disk.size for disk in instance.disks]
5361 elif st_groups[1] == "size":
5363 val = instance.FindDisk(st_groups[2]).size
5364 except errors.OpPrereqError:
5367 assert False, "Unhandled disk parameter"
5368 elif st_groups[0] == "nic":
5369 if st_groups[1] == "count":
5370 val = len(instance.nics)
5371 elif st_groups[1] == "macs":
5372 val = [nic.mac for nic in instance.nics]
5373 elif st_groups[1] == "ips":
5374 val = [nic.ip for nic in instance.nics]
5375 elif st_groups[1] == "modes":
5376 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5377 elif st_groups[1] == "links":
5378 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5379 elif st_groups[1] == "bridges":
5382 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5383 val.append(nicp[constants.NIC_LINK])
5388 nic_idx = int(st_groups[2])
5389 if nic_idx >= len(instance.nics):
5392 if st_groups[1] == "mac":
5393 val = instance.nics[nic_idx].mac
5394 elif st_groups[1] == "ip":
5395 val = instance.nics[nic_idx].ip
5396 elif st_groups[1] == "mode":
5397 val = i_nicp[nic_idx][constants.NIC_MODE]
5398 elif st_groups[1] == "link":
5399 val = i_nicp[nic_idx][constants.NIC_LINK]
5400 elif st_groups[1] == "bridge":
5401 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5402 if nic_mode == constants.NIC_MODE_BRIDGED:
5403 val = i_nicp[nic_idx][constants.NIC_LINK]
5407 assert False, "Unhandled NIC parameter"
5409 assert False, ("Declared but unhandled variable parameter '%s'" %
5412 assert False, "Declared but unhandled parameter '%s'" % field
5419 class LUFailoverInstance(LogicalUnit):
5420 """Failover an instance.
5423 HPATH = "instance-failover"
5424 HTYPE = constants.HTYPE_INSTANCE
5427 ("ignore_consistency", False, _TBool),
5432 def ExpandNames(self):
5433 self._ExpandAndLockInstance()
5434 self.needed_locks[locking.LEVEL_NODE] = []
5435 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5437 def DeclareLocks(self, level):
5438 if level == locking.LEVEL_NODE:
5439 self._LockInstancesNodes()
5441 def BuildHooksEnv(self):
5444 This runs on master, primary and secondary nodes of the instance.
5447 instance = self.instance
5448 source_node = instance.primary_node
5449 target_node = instance.secondary_nodes[0]
5451 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5452 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5453 "OLD_PRIMARY": source_node,
5454 "OLD_SECONDARY": target_node,
5455 "NEW_PRIMARY": target_node,
5456 "NEW_SECONDARY": source_node,
5458 env.update(_BuildInstanceHookEnvByObject(self, instance))
5459 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5461 nl_post.append(source_node)
5462 return env, nl, nl_post
5464 def CheckPrereq(self):
5465 """Check prerequisites.
5467 This checks that the instance is in the cluster.
5470 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5471 assert self.instance is not None, \
5472 "Cannot retrieve locked instance %s" % self.op.instance_name
5474 bep = self.cfg.GetClusterInfo().FillBE(instance)
5475 if instance.disk_template not in constants.DTS_NET_MIRROR:
5476 raise errors.OpPrereqError("Instance's disk layout is not"
5477 " network mirrored, cannot failover.",
5480 secondary_nodes = instance.secondary_nodes
5481 if not secondary_nodes:
5482 raise errors.ProgrammerError("no secondary node but using "
5483 "a mirrored disk template")
5485 target_node = secondary_nodes[0]
5486 _CheckNodeOnline(self, target_node)
5487 _CheckNodeNotDrained(self, target_node)
5488 if instance.admin_up:
5489 # check memory requirements on the secondary node
5490 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5491 instance.name, bep[constants.BE_MEMORY],
5492 instance.hypervisor)
5494 self.LogInfo("Not checking memory on the secondary node as"
5495 " instance will not be started")
5497 # check bridge existance
5498 _CheckInstanceBridgesExist(self, instance, node=target_node)
5500 def Exec(self, feedback_fn):
5501 """Failover an instance.
5503 The failover is done by shutting it down on its present node and
5504 starting it on the secondary.
5507 instance = self.instance
5509 source_node = instance.primary_node
5510 target_node = instance.secondary_nodes[0]
5512 if instance.admin_up:
5513 feedback_fn("* checking disk consistency between source and target")
5514 for dev in instance.disks:
5515 # for drbd, these are drbd over lvm
5516 if not _CheckDiskConsistency(self, dev, target_node, False):
5517 if not self.op.ignore_consistency:
5518 raise errors.OpExecError("Disk %s is degraded on target node,"
5519 " aborting failover." % dev.iv_name)
5521 feedback_fn("* not checking disk consistency as instance is not running")
5523 feedback_fn("* shutting down instance on source node")
5524 logging.info("Shutting down instance %s on node %s",
5525 instance.name, source_node)
5527 result = self.rpc.call_instance_shutdown(source_node, instance,
5528 self.op.shutdown_timeout)
5529 msg = result.fail_msg
5531 if self.op.ignore_consistency:
5532 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5533 " Proceeding anyway. Please make sure node"
5534 " %s is down. Error details: %s",
5535 instance.name, source_node, source_node, msg)
5537 raise errors.OpExecError("Could not shutdown instance %s on"
5539 (instance.name, source_node, msg))
5541 feedback_fn("* deactivating the instance's disks on source node")
5542 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5543 raise errors.OpExecError("Can't shut down the instance's disks.")
5545 instance.primary_node = target_node
5546 # distribute new instance config to the other nodes
5547 self.cfg.Update(instance, feedback_fn)
5549 # Only start the instance if it's marked as up
5550 if instance.admin_up:
5551 feedback_fn("* activating the instance's disks on target node")
5552 logging.info("Starting instance %s on node %s",
5553 instance.name, target_node)
5555 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5556 ignore_secondaries=True)
5558 _ShutdownInstanceDisks(self, instance)
5559 raise errors.OpExecError("Can't activate the instance's disks")
5561 feedback_fn("* starting the instance on the target node")
5562 result = self.rpc.call_instance_start(target_node, instance, None, None)
5563 msg = result.fail_msg
5565 _ShutdownInstanceDisks(self, instance)
5566 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5567 (instance.name, target_node, msg))
5570 class LUMigrateInstance(LogicalUnit):
5571 """Migrate an instance.
5573 This is migration without shutting down, compared to the failover,
5574 which is done with shutdown.
5577 HPATH = "instance-migrate"
5578 HTYPE = constants.HTYPE_INSTANCE
5583 ("cleanup", False, _TBool),
5588 def ExpandNames(self):
5589 self._ExpandAndLockInstance()
5591 self.needed_locks[locking.LEVEL_NODE] = []
5592 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5594 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5596 self.tasklets = [self._migrater]
5598 def DeclareLocks(self, level):
5599 if level == locking.LEVEL_NODE:
5600 self._LockInstancesNodes()
5602 def BuildHooksEnv(self):
5605 This runs on master, primary and secondary nodes of the instance.
5608 instance = self._migrater.instance
5609 source_node = instance.primary_node
5610 target_node = instance.secondary_nodes[0]
5611 env = _BuildInstanceHookEnvByObject(self, instance)
5612 env["MIGRATE_LIVE"] = self._migrater.live
5613 env["MIGRATE_CLEANUP"] = self.op.cleanup
5615 "OLD_PRIMARY": source_node,
5616 "OLD_SECONDARY": target_node,
5617 "NEW_PRIMARY": target_node,
5618 "NEW_SECONDARY": source_node,
5620 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5622 nl_post.append(source_node)
5623 return env, nl, nl_post
5626 class LUMoveInstance(LogicalUnit):
5627 """Move an instance by data-copying.
5630 HPATH = "instance-move"
5631 HTYPE = constants.HTYPE_INSTANCE
5634 ("target_node", _NoDefault, _TNonEmptyString),
5639 def ExpandNames(self):
5640 self._ExpandAndLockInstance()
5641 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5642 self.op.target_node = target_node
5643 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5644 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5646 def DeclareLocks(self, level):
5647 if level == locking.LEVEL_NODE:
5648 self._LockInstancesNodes(primary_only=True)
5650 def BuildHooksEnv(self):
5653 This runs on master, primary and secondary nodes of the instance.
5657 "TARGET_NODE": self.op.target_node,
5658 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5660 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5661 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5662 self.op.target_node]
5665 def CheckPrereq(self):
5666 """Check prerequisites.
5668 This checks that the instance is in the cluster.
5671 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5672 assert self.instance is not None, \
5673 "Cannot retrieve locked instance %s" % self.op.instance_name
5675 node = self.cfg.GetNodeInfo(self.op.target_node)
5676 assert node is not None, \
5677 "Cannot retrieve locked node %s" % self.op.target_node
5679 self.target_node = target_node = node.name
5681 if target_node == instance.primary_node:
5682 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5683 (instance.name, target_node),
5686 bep = self.cfg.GetClusterInfo().FillBE(instance)
5688 for idx, dsk in enumerate(instance.disks):
5689 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5690 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5691 " cannot copy" % idx, errors.ECODE_STATE)
5693 _CheckNodeOnline(self, target_node)
5694 _CheckNodeNotDrained(self, target_node)
5696 if instance.admin_up:
5697 # check memory requirements on the secondary node
5698 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5699 instance.name, bep[constants.BE_MEMORY],
5700 instance.hypervisor)
5702 self.LogInfo("Not checking memory on the secondary node as"
5703 " instance will not be started")
5705 # check bridge existance
5706 _CheckInstanceBridgesExist(self, instance, node=target_node)
5708 def Exec(self, feedback_fn):
5709 """Move an instance.
5711 The move is done by shutting it down on its present node, copying
5712 the data over (slow) and starting it on the new node.
5715 instance = self.instance
5717 source_node = instance.primary_node
5718 target_node = self.target_node
5720 self.LogInfo("Shutting down instance %s on source node %s",
5721 instance.name, source_node)
5723 result = self.rpc.call_instance_shutdown(source_node, instance,
5724 self.op.shutdown_timeout)
5725 msg = result.fail_msg
5727 if self.op.ignore_consistency:
5728 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5729 " Proceeding anyway. Please make sure node"
5730 " %s is down. Error details: %s",
5731 instance.name, source_node, source_node, msg)
5733 raise errors.OpExecError("Could not shutdown instance %s on"
5735 (instance.name, source_node, msg))
5737 # create the target disks
5739 _CreateDisks(self, instance, target_node=target_node)
5740 except errors.OpExecError:
5741 self.LogWarning("Device creation failed, reverting...")
5743 _RemoveDisks(self, instance, target_node=target_node)
5745 self.cfg.ReleaseDRBDMinors(instance.name)
5748 cluster_name = self.cfg.GetClusterInfo().cluster_name
5751 # activate, get path, copy the data over
5752 for idx, disk in enumerate(instance.disks):
5753 self.LogInfo("Copying data for disk %d", idx)
5754 result = self.rpc.call_blockdev_assemble(target_node, disk,
5755 instance.name, True)
5757 self.LogWarning("Can't assemble newly created disk %d: %s",
5758 idx, result.fail_msg)
5759 errs.append(result.fail_msg)
5761 dev_path = result.payload
5762 result = self.rpc.call_blockdev_export(source_node, disk,
5763 target_node, dev_path,
5766 self.LogWarning("Can't copy data over for disk %d: %s",
5767 idx, result.fail_msg)
5768 errs.append(result.fail_msg)
5772 self.LogWarning("Some disks failed to copy, aborting")
5774 _RemoveDisks(self, instance, target_node=target_node)
5776 self.cfg.ReleaseDRBDMinors(instance.name)
5777 raise errors.OpExecError("Errors during disk copy: %s" %
5780 instance.primary_node = target_node
5781 self.cfg.Update(instance, feedback_fn)
5783 self.LogInfo("Removing the disks on the original node")
5784 _RemoveDisks(self, instance, target_node=source_node)
5786 # Only start the instance if it's marked as up
5787 if instance.admin_up:
5788 self.LogInfo("Starting instance %s on node %s",
5789 instance.name, target_node)
5791 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5792 ignore_secondaries=True)
5794 _ShutdownInstanceDisks(self, instance)
5795 raise errors.OpExecError("Can't activate the instance's disks")
5797 result = self.rpc.call_instance_start(target_node, instance, None, None)
5798 msg = result.fail_msg
5800 _ShutdownInstanceDisks(self, instance)
5801 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5802 (instance.name, target_node, msg))
5805 class LUMigrateNode(LogicalUnit):
5806 """Migrate all instances from a node.
5809 HPATH = "node-migrate"
5810 HTYPE = constants.HTYPE_NODE
5818 def ExpandNames(self):
5819 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5821 self.needed_locks = {
5822 locking.LEVEL_NODE: [self.op.node_name],
5825 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5827 # Create tasklets for migrating instances for all instances on this node
5831 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5832 logging.debug("Migrating instance %s", inst.name)
5833 names.append(inst.name)
5835 tasklets.append(TLMigrateInstance(self, inst.name, False))
5837 self.tasklets = tasklets
5839 # Declare instance locks
5840 self.needed_locks[locking.LEVEL_INSTANCE] = names
5842 def DeclareLocks(self, level):
5843 if level == locking.LEVEL_NODE:
5844 self._LockInstancesNodes()
5846 def BuildHooksEnv(self):
5849 This runs on the master, the primary and all the secondaries.
5853 "NODE_NAME": self.op.node_name,
5856 nl = [self.cfg.GetMasterNode()]
5858 return (env, nl, nl)
5861 class TLMigrateInstance(Tasklet):
5862 """Tasklet class for instance migration.
5865 @ivar live: whether the migration will be done live or non-live;
5866 this variable is initalized only after CheckPrereq has run
5869 def __init__(self, lu, instance_name, cleanup):
5870 """Initializes this class.
5873 Tasklet.__init__(self, lu)
5876 self.instance_name = instance_name
5877 self.cleanup = cleanup
5878 self.live = False # will be overridden later
5880 def CheckPrereq(self):
5881 """Check prerequisites.
5883 This checks that the instance is in the cluster.
5886 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5887 instance = self.cfg.GetInstanceInfo(instance_name)
5888 assert instance is not None
5890 if instance.disk_template != constants.DT_DRBD8:
5891 raise errors.OpPrereqError("Instance's disk layout is not"
5892 " drbd8, cannot migrate.", errors.ECODE_STATE)
5894 secondary_nodes = instance.secondary_nodes
5895 if not secondary_nodes:
5896 raise errors.ConfigurationError("No secondary node but using"
5897 " drbd8 disk template")
5899 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5901 target_node = secondary_nodes[0]
5902 # check memory requirements on the secondary node
5903 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5904 instance.name, i_be[constants.BE_MEMORY],
5905 instance.hypervisor)
5907 # check bridge existance
5908 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5910 if not self.cleanup:
5911 _CheckNodeNotDrained(self.lu, target_node)
5912 result = self.rpc.call_instance_migratable(instance.primary_node,
5914 result.Raise("Can't migrate, please use failover",
5915 prereq=True, ecode=errors.ECODE_STATE)
5917 self.instance = instance
5919 if self.lu.op.live is not None and self.lu.op.mode is not None:
5920 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5921 " parameters are accepted",
5923 if self.lu.op.live is not None:
5925 self.lu.op.mode = constants.HT_MIGRATION_LIVE
5927 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5928 # reset the 'live' parameter to None so that repeated
5929 # invocations of CheckPrereq do not raise an exception
5930 self.lu.op.live = None
5931 elif self.lu.op.mode is None:
5932 # read the default value from the hypervisor
5933 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5934 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5936 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5938 def _WaitUntilSync(self):
5939 """Poll with custom rpc for disk sync.
5941 This uses our own step-based rpc call.
5944 self.feedback_fn("* wait until resync is done")
5948 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5950 self.instance.disks)
5952 for node, nres in result.items():
5953 nres.Raise("Cannot resync disks on node %s" % node)
5954 node_done, node_percent = nres.payload
5955 all_done = all_done and node_done
5956 if node_percent is not None:
5957 min_percent = min(min_percent, node_percent)
5959 if min_percent < 100:
5960 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5963 def _EnsureSecondary(self, node):
5964 """Demote a node to secondary.
5967 self.feedback_fn("* switching node %s to secondary mode" % node)
5969 for dev in self.instance.disks:
5970 self.cfg.SetDiskID(dev, node)
5972 result = self.rpc.call_blockdev_close(node, self.instance.name,
5973 self.instance.disks)
5974 result.Raise("Cannot change disk to secondary on node %s" % node)
5976 def _GoStandalone(self):
5977 """Disconnect from the network.
5980 self.feedback_fn("* changing into standalone mode")
5981 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5982 self.instance.disks)
5983 for node, nres in result.items():
5984 nres.Raise("Cannot disconnect disks node %s" % node)
5986 def _GoReconnect(self, multimaster):
5987 """Reconnect to the network.
5993 msg = "single-master"
5994 self.feedback_fn("* changing disks into %s mode" % msg)
5995 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5996 self.instance.disks,
5997 self.instance.name, multimaster)
5998 for node, nres in result.items():
5999 nres.Raise("Cannot change disks config on node %s" % node)
6001 def _ExecCleanup(self):
6002 """Try to cleanup after a failed migration.
6004 The cleanup is done by:
6005 - check that the instance is running only on one node
6006 (and update the config if needed)
6007 - change disks on its secondary node to secondary
6008 - wait until disks are fully synchronized
6009 - disconnect from the network
6010 - change disks into single-master mode
6011 - wait again until disks are fully synchronized
6014 instance = self.instance
6015 target_node = self.target_node
6016 source_node = self.source_node
6018 # check running on only one node
6019 self.feedback_fn("* checking where the instance actually runs"
6020 " (if this hangs, the hypervisor might be in"
6022 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6023 for node, result in ins_l.items():
6024 result.Raise("Can't contact node %s" % node)
6026 runningon_source = instance.name in ins_l[source_node].payload
6027 runningon_target = instance.name in ins_l[target_node].payload
6029 if runningon_source and runningon_target:
6030 raise errors.OpExecError("Instance seems to be running on two nodes,"
6031 " or the hypervisor is confused. You will have"
6032 " to ensure manually that it runs only on one"
6033 " and restart this operation.")
6035 if not (runningon_source or runningon_target):
6036 raise errors.OpExecError("Instance does not seem to be running at all."
6037 " In this case, it's safer to repair by"
6038 " running 'gnt-instance stop' to ensure disk"
6039 " shutdown, and then restarting it.")
6041 if runningon_target:
6042 # the migration has actually succeeded, we need to update the config
6043 self.feedback_fn("* instance running on secondary node (%s),"
6044 " updating config" % target_node)
6045 instance.primary_node = target_node
6046 self.cfg.Update(instance, self.feedback_fn)
6047 demoted_node = source_node
6049 self.feedback_fn("* instance confirmed to be running on its"
6050 " primary node (%s)" % source_node)
6051 demoted_node = target_node
6053 self._EnsureSecondary(demoted_node)
6055 self._WaitUntilSync()
6056 except errors.OpExecError:
6057 # we ignore here errors, since if the device is standalone, it
6058 # won't be able to sync
6060 self._GoStandalone()
6061 self._GoReconnect(False)
6062 self._WaitUntilSync()
6064 self.feedback_fn("* done")
6066 def _RevertDiskStatus(self):
6067 """Try to revert the disk status after a failed migration.
6070 target_node = self.target_node
6072 self._EnsureSecondary(target_node)
6073 self._GoStandalone()
6074 self._GoReconnect(False)
6075 self._WaitUntilSync()
6076 except errors.OpExecError, err:
6077 self.lu.LogWarning("Migration failed and I can't reconnect the"
6078 " drives: error '%s'\n"
6079 "Please look and recover the instance status" %
6082 def _AbortMigration(self):
6083 """Call the hypervisor code to abort a started migration.
6086 instance = self.instance
6087 target_node = self.target_node
6088 migration_info = self.migration_info
6090 abort_result = self.rpc.call_finalize_migration(target_node,
6094 abort_msg = abort_result.fail_msg
6096 logging.error("Aborting migration failed on target node %s: %s",
6097 target_node, abort_msg)
6098 # Don't raise an exception here, as we stil have to try to revert the
6099 # disk status, even if this step failed.
6101 def _ExecMigration(self):
6102 """Migrate an instance.
6104 The migrate is done by:
6105 - change the disks into dual-master mode
6106 - wait until disks are fully synchronized again
6107 - migrate the instance
6108 - change disks on the new secondary node (the old primary) to secondary
6109 - wait until disks are fully synchronized
6110 - change disks into single-master mode
6113 instance = self.instance
6114 target_node = self.target_node
6115 source_node = self.source_node
6117 self.feedback_fn("* checking disk consistency between source and target")
6118 for dev in instance.disks:
6119 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6120 raise errors.OpExecError("Disk %s is degraded or not fully"
6121 " synchronized on target node,"
6122 " aborting migrate." % dev.iv_name)
6124 # First get the migration information from the remote node
6125 result = self.rpc.call_migration_info(source_node, instance)
6126 msg = result.fail_msg
6128 log_err = ("Failed fetching source migration information from %s: %s" %
6130 logging.error(log_err)
6131 raise errors.OpExecError(log_err)
6133 self.migration_info = migration_info = result.payload
6135 # Then switch the disks to master/master mode
6136 self._EnsureSecondary(target_node)
6137 self._GoStandalone()
6138 self._GoReconnect(True)
6139 self._WaitUntilSync()
6141 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6142 result = self.rpc.call_accept_instance(target_node,
6145 self.nodes_ip[target_node])
6147 msg = result.fail_msg
6149 logging.error("Instance pre-migration failed, trying to revert"
6150 " disk status: %s", msg)
6151 self.feedback_fn("Pre-migration failed, aborting")
6152 self._AbortMigration()
6153 self._RevertDiskStatus()
6154 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6155 (instance.name, msg))
6157 self.feedback_fn("* migrating instance to %s" % target_node)
6159 result = self.rpc.call_instance_migrate(source_node, instance,
6160 self.nodes_ip[target_node],
6162 msg = result.fail_msg
6164 logging.error("Instance migration failed, trying to revert"
6165 " disk status: %s", msg)
6166 self.feedback_fn("Migration failed, aborting")
6167 self._AbortMigration()
6168 self._RevertDiskStatus()
6169 raise errors.OpExecError("Could not migrate instance %s: %s" %
6170 (instance.name, msg))
6173 instance.primary_node = target_node
6174 # distribute new instance config to the other nodes
6175 self.cfg.Update(instance, self.feedback_fn)
6177 result = self.rpc.call_finalize_migration(target_node,
6181 msg = result.fail_msg
6183 logging.error("Instance migration succeeded, but finalization failed:"
6185 raise errors.OpExecError("Could not finalize instance migration: %s" %
6188 self._EnsureSecondary(source_node)
6189 self._WaitUntilSync()
6190 self._GoStandalone()
6191 self._GoReconnect(False)
6192 self._WaitUntilSync()
6194 self.feedback_fn("* done")
6196 def Exec(self, feedback_fn):
6197 """Perform the migration.
6200 feedback_fn("Migrating instance %s" % self.instance.name)
6202 self.feedback_fn = feedback_fn
6204 self.source_node = self.instance.primary_node
6205 self.target_node = self.instance.secondary_nodes[0]
6206 self.all_nodes = [self.source_node, self.target_node]
6208 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6209 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6213 return self._ExecCleanup()
6215 return self._ExecMigration()
6218 def _CreateBlockDev(lu, node, instance, device, force_create,
6220 """Create a tree of block devices on a given node.
6222 If this device type has to be created on secondaries, create it and
6225 If not, just recurse to children keeping the same 'force' value.
6227 @param lu: the lu on whose behalf we execute
6228 @param node: the node on which to create the device
6229 @type instance: L{objects.Instance}
6230 @param instance: the instance which owns the device
6231 @type device: L{objects.Disk}
6232 @param device: the device to create
6233 @type force_create: boolean
6234 @param force_create: whether to force creation of this device; this
6235 will be change to True whenever we find a device which has
6236 CreateOnSecondary() attribute
6237 @param info: the extra 'metadata' we should attach to the device
6238 (this will be represented as a LVM tag)
6239 @type force_open: boolean
6240 @param force_open: this parameter will be passes to the
6241 L{backend.BlockdevCreate} function where it specifies
6242 whether we run on primary or not, and it affects both
6243 the child assembly and the device own Open() execution
6246 if device.CreateOnSecondary():
6250 for child in device.children:
6251 _CreateBlockDev(lu, node, instance, child, force_create,
6254 if not force_create:
6257 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6260 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6261 """Create a single block device on a given node.
6263 This will not recurse over children of the device, so they must be
6266 @param lu: the lu on whose behalf we execute
6267 @param node: the node on which to create the device
6268 @type instance: L{objects.Instance}
6269 @param instance: the instance which owns the device
6270 @type device: L{objects.Disk}
6271 @param device: the device to create
6272 @param info: the extra 'metadata' we should attach to the device
6273 (this will be represented as a LVM tag)
6274 @type force_open: boolean
6275 @param force_open: this parameter will be passes to the
6276 L{backend.BlockdevCreate} function where it specifies
6277 whether we run on primary or not, and it affects both
6278 the child assembly and the device own Open() execution
6281 lu.cfg.SetDiskID(device, node)
6282 result = lu.rpc.call_blockdev_create(node, device, device.size,
6283 instance.name, force_open, info)
6284 result.Raise("Can't create block device %s on"
6285 " node %s for instance %s" % (device, node, instance.name))
6286 if device.physical_id is None:
6287 device.physical_id = result.payload
6290 def _GenerateUniqueNames(lu, exts):
6291 """Generate a suitable LV name.
6293 This will generate a logical volume name for the given instance.
6298 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6299 results.append("%s%s" % (new_id, val))
6303 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6305 """Generate a drbd8 device complete with its children.
6308 port = lu.cfg.AllocatePort()
6309 vgname = lu.cfg.GetVGName()
6310 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6311 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6312 logical_id=(vgname, names[0]))
6313 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6314 logical_id=(vgname, names[1]))
6315 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6316 logical_id=(primary, secondary, port,
6319 children=[dev_data, dev_meta],
6324 def _GenerateDiskTemplate(lu, template_name,
6325 instance_name, primary_node,
6326 secondary_nodes, disk_info,
6327 file_storage_dir, file_driver,
6329 """Generate the entire disk layout for a given template type.
6332 #TODO: compute space requirements
6334 vgname = lu.cfg.GetVGName()
6335 disk_count = len(disk_info)
6337 if template_name == constants.DT_DISKLESS:
6339 elif template_name == constants.DT_PLAIN:
6340 if len(secondary_nodes) != 0:
6341 raise errors.ProgrammerError("Wrong template configuration")
6343 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6344 for i in range(disk_count)])
6345 for idx, disk in enumerate(disk_info):
6346 disk_index = idx + base_index
6347 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6348 logical_id=(vgname, names[idx]),
6349 iv_name="disk/%d" % disk_index,
6351 disks.append(disk_dev)
6352 elif template_name == constants.DT_DRBD8:
6353 if len(secondary_nodes) != 1:
6354 raise errors.ProgrammerError("Wrong template configuration")
6355 remote_node = secondary_nodes[0]
6356 minors = lu.cfg.AllocateDRBDMinor(
6357 [primary_node, remote_node] * len(disk_info), instance_name)
6360 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6361 for i in range(disk_count)]):
6362 names.append(lv_prefix + "_data")
6363 names.append(lv_prefix + "_meta")
6364 for idx, disk in enumerate(disk_info):
6365 disk_index = idx + base_index
6366 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6367 disk["size"], names[idx*2:idx*2+2],
6368 "disk/%d" % disk_index,
6369 minors[idx*2], minors[idx*2+1])
6370 disk_dev.mode = disk["mode"]
6371 disks.append(disk_dev)
6372 elif template_name == constants.DT_FILE:
6373 if len(secondary_nodes) != 0:
6374 raise errors.ProgrammerError("Wrong template configuration")
6376 _RequireFileStorage()
6378 for idx, disk in enumerate(disk_info):
6379 disk_index = idx + base_index
6380 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6381 iv_name="disk/%d" % disk_index,
6382 logical_id=(file_driver,
6383 "%s/disk%d" % (file_storage_dir,
6386 disks.append(disk_dev)
6388 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6392 def _GetInstanceInfoText(instance):
6393 """Compute that text that should be added to the disk's metadata.
6396 return "originstname+%s" % instance.name
6399 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6400 """Create all disks for an instance.
6402 This abstracts away some work from AddInstance.
6404 @type lu: L{LogicalUnit}
6405 @param lu: the logical unit on whose behalf we execute
6406 @type instance: L{objects.Instance}
6407 @param instance: the instance whose disks we should create
6409 @param to_skip: list of indices to skip
6410 @type target_node: string
6411 @param target_node: if passed, overrides the target node for creation
6413 @return: the success of the creation
6416 info = _GetInstanceInfoText(instance)
6417 if target_node is None:
6418 pnode = instance.primary_node
6419 all_nodes = instance.all_nodes
6424 if instance.disk_template == constants.DT_FILE:
6425 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6426 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6428 result.Raise("Failed to create directory '%s' on"
6429 " node %s" % (file_storage_dir, pnode))
6431 # Note: this needs to be kept in sync with adding of disks in
6432 # LUSetInstanceParams
6433 for idx, device in enumerate(instance.disks):
6434 if to_skip and idx in to_skip:
6436 logging.info("Creating volume %s for instance %s",
6437 device.iv_name, instance.name)
6439 for node in all_nodes:
6440 f_create = node == pnode
6441 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6444 def _RemoveDisks(lu, instance, target_node=None):
6445 """Remove all disks for an instance.
6447 This abstracts away some work from `AddInstance()` and
6448 `RemoveInstance()`. Note that in case some of the devices couldn't
6449 be removed, the removal will continue with the other ones (compare
6450 with `_CreateDisks()`).
6452 @type lu: L{LogicalUnit}
6453 @param lu: the logical unit on whose behalf we execute
6454 @type instance: L{objects.Instance}
6455 @param instance: the instance whose disks we should remove
6456 @type target_node: string
6457 @param target_node: used to override the node on which to remove the disks
6459 @return: the success of the removal
6462 logging.info("Removing block devices for instance %s", instance.name)
6465 for device in instance.disks:
6467 edata = [(target_node, device)]
6469 edata = device.ComputeNodeTree(instance.primary_node)
6470 for node, disk in edata:
6471 lu.cfg.SetDiskID(disk, node)
6472 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6474 lu.LogWarning("Could not remove block device %s on node %s,"
6475 " continuing anyway: %s", device.iv_name, node, msg)
6478 if instance.disk_template == constants.DT_FILE:
6479 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6483 tgt = instance.primary_node
6484 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6486 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6487 file_storage_dir, instance.primary_node, result.fail_msg)
6493 def _ComputeDiskSize(disk_template, disks):
6494 """Compute disk size requirements in the volume group
6497 # Required free disk space as a function of disk and swap space
6499 constants.DT_DISKLESS: None,
6500 constants.DT_PLAIN: sum(d["size"] for d in disks),
6501 # 128 MB are added for drbd metadata for each disk
6502 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6503 constants.DT_FILE: None,
6506 if disk_template not in req_size_dict:
6507 raise errors.ProgrammerError("Disk template '%s' size requirement"
6508 " is unknown" % disk_template)
6510 return req_size_dict[disk_template]
6513 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6514 """Hypervisor parameter validation.
6516 This function abstract the hypervisor parameter validation to be
6517 used in both instance create and instance modify.
6519 @type lu: L{LogicalUnit}
6520 @param lu: the logical unit for which we check
6521 @type nodenames: list
6522 @param nodenames: the list of nodes on which we should check
6523 @type hvname: string
6524 @param hvname: the name of the hypervisor we should use
6525 @type hvparams: dict
6526 @param hvparams: the parameters which we need to check
6527 @raise errors.OpPrereqError: if the parameters are not valid
6530 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6533 for node in nodenames:
6537 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6540 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6541 """OS parameters validation.
6543 @type lu: L{LogicalUnit}
6544 @param lu: the logical unit for which we check
6545 @type required: boolean
6546 @param required: whether the validation should fail if the OS is not
6548 @type nodenames: list
6549 @param nodenames: the list of nodes on which we should check
6550 @type osname: string
6551 @param osname: the name of the hypervisor we should use
6552 @type osparams: dict
6553 @param osparams: the parameters which we need to check
6554 @raise errors.OpPrereqError: if the parameters are not valid
6557 result = lu.rpc.call_os_validate(required, nodenames, osname,
6558 [constants.OS_VALIDATE_PARAMETERS],
6560 for node, nres in result.items():
6561 # we don't check for offline cases since this should be run only
6562 # against the master node and/or an instance's nodes
6563 nres.Raise("OS Parameters validation failed on node %s" % node)
6564 if not nres.payload:
6565 lu.LogInfo("OS %s not found on node %s, validation skipped",
6569 class LUCreateInstance(LogicalUnit):
6570 """Create an instance.
6573 HPATH = "instance-add"
6574 HTYPE = constants.HTYPE_INSTANCE
6577 ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6578 ("start", True, _TBool),
6579 ("wait_for_sync", True, _TBool),
6580 ("ip_check", True, _TBool),
6581 ("name_check", True, _TBool),
6582 ("disks", _NoDefault, _TListOf(_TDict)),
6583 ("nics", _NoDefault, _TListOf(_TDict)),
6584 ("hvparams", _EmptyDict, _TDict),
6585 ("beparams", _EmptyDict, _TDict),
6586 ("osparams", _EmptyDict, _TDict),
6587 ("no_install", None, _TMaybeBool),
6588 ("os_type", None, _TMaybeString),
6589 ("force_variant", False, _TBool),
6590 ("source_handshake", None, _TOr(_TList, _TNone)),
6591 ("source_x509_ca", None, _TMaybeString),
6592 ("source_instance_name", None, _TMaybeString),
6593 ("src_node", None, _TMaybeString),
6594 ("src_path", None, _TMaybeString),
6595 ("pnode", None, _TMaybeString),
6596 ("snode", None, _TMaybeString),
6597 ("iallocator", None, _TMaybeString),
6598 ("hypervisor", None, _TMaybeString),
6599 ("disk_template", _NoDefault, _CheckDiskTemplate),
6600 ("identify_defaults", False, _TBool),
6601 ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6602 ("file_storage_dir", None, _TMaybeString),
6606 def CheckArguments(self):
6610 # do not require name_check to ease forward/backward compatibility
6612 if self.op.no_install and self.op.start:
6613 self.LogInfo("No-installation mode selected, disabling startup")
6614 self.op.start = False
6615 # validate/normalize the instance name
6616 self.op.instance_name = \
6617 netutils.HostInfo.NormalizeName(self.op.instance_name)
6619 if self.op.ip_check and not self.op.name_check:
6620 # TODO: make the ip check more flexible and not depend on the name check
6621 raise errors.OpPrereqError("Cannot do ip check without a name check",
6624 # check nics' parameter names
6625 for nic in self.op.nics:
6626 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6628 # check disks. parameter names and consistent adopt/no-adopt strategy
6629 has_adopt = has_no_adopt = False
6630 for disk in self.op.disks:
6631 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6636 if has_adopt and has_no_adopt:
6637 raise errors.OpPrereqError("Either all disks are adopted or none is",
6640 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6641 raise errors.OpPrereqError("Disk adoption is not supported for the"
6642 " '%s' disk template" %
6643 self.op.disk_template,
6645 if self.op.iallocator is not None:
6646 raise errors.OpPrereqError("Disk adoption not allowed with an"
6647 " iallocator script", errors.ECODE_INVAL)
6648 if self.op.mode == constants.INSTANCE_IMPORT:
6649 raise errors.OpPrereqError("Disk adoption not allowed for"
6650 " instance import", errors.ECODE_INVAL)
6652 self.adopt_disks = has_adopt
6654 # instance name verification
6655 if self.op.name_check:
6656 self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6657 self.op.instance_name = self.hostname1.name
6658 # used in CheckPrereq for ip ping check
6659 self.check_ip = self.hostname1.ip
6661 self.check_ip = None
6663 # file storage checks
6664 if (self.op.file_driver and
6665 not self.op.file_driver in constants.FILE_DRIVER):
6666 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6667 self.op.file_driver, errors.ECODE_INVAL)
6669 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6670 raise errors.OpPrereqError("File storage directory path not absolute",
6673 ### Node/iallocator related checks
6674 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6676 if self.op.pnode is not None:
6677 if self.op.disk_template in constants.DTS_NET_MIRROR:
6678 if self.op.snode is None:
6679 raise errors.OpPrereqError("The networked disk templates need"
6680 " a mirror node", errors.ECODE_INVAL)
6682 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6684 self.op.snode = None
6686 self._cds = _GetClusterDomainSecret()
6688 if self.op.mode == constants.INSTANCE_IMPORT:
6689 # On import force_variant must be True, because if we forced it at
6690 # initial install, our only chance when importing it back is that it
6692 self.op.force_variant = True
6694 if self.op.no_install:
6695 self.LogInfo("No-installation mode has no effect during import")
6697 elif self.op.mode == constants.INSTANCE_CREATE:
6698 if self.op.os_type is None:
6699 raise errors.OpPrereqError("No guest OS specified",
6701 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6702 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6703 " installation" % self.op.os_type,
6705 if self.op.disk_template is None:
6706 raise errors.OpPrereqError("No disk template specified",
6709 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6710 # Check handshake to ensure both clusters have the same domain secret
6711 src_handshake = self.op.source_handshake
6712 if not src_handshake:
6713 raise errors.OpPrereqError("Missing source handshake",
6716 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6719 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6722 # Load and check source CA
6723 self.source_x509_ca_pem = self.op.source_x509_ca
6724 if not self.source_x509_ca_pem:
6725 raise errors.OpPrereqError("Missing source X509 CA",
6729 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6731 except OpenSSL.crypto.Error, err:
6732 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6733 (err, ), errors.ECODE_INVAL)
6735 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6736 if errcode is not None:
6737 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6740 self.source_x509_ca = cert
6742 src_instance_name = self.op.source_instance_name
6743 if not src_instance_name:
6744 raise errors.OpPrereqError("Missing source instance name",
6747 norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6748 self.source_instance_name = netutils.GetHostInfo(norm_name).name
6751 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6752 self.op.mode, errors.ECODE_INVAL)
6754 def ExpandNames(self):
6755 """ExpandNames for CreateInstance.
6757 Figure out the right locks for instance creation.
6760 self.needed_locks = {}
6762 instance_name = self.op.instance_name
6763 # this is just a preventive check, but someone might still add this
6764 # instance in the meantime, and creation will fail at lock-add time
6765 if instance_name in self.cfg.GetInstanceList():
6766 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6767 instance_name, errors.ECODE_EXISTS)
6769 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6771 if self.op.iallocator:
6772 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6774 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6775 nodelist = [self.op.pnode]
6776 if self.op.snode is not None:
6777 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6778 nodelist.append(self.op.snode)
6779 self.needed_locks[locking.LEVEL_NODE] = nodelist
6781 # in case of import lock the source node too
6782 if self.op.mode == constants.INSTANCE_IMPORT:
6783 src_node = self.op.src_node
6784 src_path = self.op.src_path
6786 if src_path is None:
6787 self.op.src_path = src_path = self.op.instance_name
6789 if src_node is None:
6790 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6791 self.op.src_node = None
6792 if os.path.isabs(src_path):
6793 raise errors.OpPrereqError("Importing an instance from an absolute"
6794 " path requires a source node option.",
6797 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6798 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6799 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6800 if not os.path.isabs(src_path):
6801 self.op.src_path = src_path = \
6802 utils.PathJoin(constants.EXPORT_DIR, src_path)
6804 def _RunAllocator(self):
6805 """Run the allocator based on input opcode.
6808 nics = [n.ToDict() for n in self.nics]
6809 ial = IAllocator(self.cfg, self.rpc,
6810 mode=constants.IALLOCATOR_MODE_ALLOC,
6811 name=self.op.instance_name,
6812 disk_template=self.op.disk_template,
6815 vcpus=self.be_full[constants.BE_VCPUS],
6816 mem_size=self.be_full[constants.BE_MEMORY],
6819 hypervisor=self.op.hypervisor,
6822 ial.Run(self.op.iallocator)
6825 raise errors.OpPrereqError("Can't compute nodes using"
6826 " iallocator '%s': %s" %
6827 (self.op.iallocator, ial.info),
6829 if len(ial.result) != ial.required_nodes:
6830 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6831 " of nodes (%s), required %s" %
6832 (self.op.iallocator, len(ial.result),
6833 ial.required_nodes), errors.ECODE_FAULT)
6834 self.op.pnode = ial.result[0]
6835 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6836 self.op.instance_name, self.op.iallocator,
6837 utils.CommaJoin(ial.result))
6838 if ial.required_nodes == 2:
6839 self.op.snode = ial.result[1]
6841 def BuildHooksEnv(self):
6844 This runs on master, primary and secondary nodes of the instance.
6848 "ADD_MODE": self.op.mode,
6850 if self.op.mode == constants.INSTANCE_IMPORT:
6851 env["SRC_NODE"] = self.op.src_node
6852 env["SRC_PATH"] = self.op.src_path
6853 env["SRC_IMAGES"] = self.src_images
6855 env.update(_BuildInstanceHookEnv(
6856 name=self.op.instance_name,
6857 primary_node=self.op.pnode,
6858 secondary_nodes=self.secondaries,
6859 status=self.op.start,
6860 os_type=self.op.os_type,
6861 memory=self.be_full[constants.BE_MEMORY],
6862 vcpus=self.be_full[constants.BE_VCPUS],
6863 nics=_NICListToTuple(self, self.nics),
6864 disk_template=self.op.disk_template,
6865 disks=[(d["size"], d["mode"]) for d in self.disks],
6868 hypervisor_name=self.op.hypervisor,
6871 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6875 def _ReadExportInfo(self):
6876 """Reads the export information from disk.
6878 It will override the opcode source node and path with the actual
6879 information, if these two were not specified before.
6881 @return: the export information
6884 assert self.op.mode == constants.INSTANCE_IMPORT
6886 src_node = self.op.src_node
6887 src_path = self.op.src_path
6889 if src_node is None:
6890 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6891 exp_list = self.rpc.call_export_list(locked_nodes)
6893 for node in exp_list:
6894 if exp_list[node].fail_msg:
6896 if src_path in exp_list[node].payload:
6898 self.op.src_node = src_node = node
6899 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6903 raise errors.OpPrereqError("No export found for relative path %s" %
6904 src_path, errors.ECODE_INVAL)
6906 _CheckNodeOnline(self, src_node)
6907 result = self.rpc.call_export_info(src_node, src_path)
6908 result.Raise("No export or invalid export found in dir %s" % src_path)
6910 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6911 if not export_info.has_section(constants.INISECT_EXP):
6912 raise errors.ProgrammerError("Corrupted export config",
6913 errors.ECODE_ENVIRON)
6915 ei_version = export_info.get(constants.INISECT_EXP, "version")
6916 if (int(ei_version) != constants.EXPORT_VERSION):
6917 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6918 (ei_version, constants.EXPORT_VERSION),
6919 errors.ECODE_ENVIRON)
6922 def _ReadExportParams(self, einfo):
6923 """Use export parameters as defaults.
6925 In case the opcode doesn't specify (as in override) some instance
6926 parameters, then try to use them from the export information, if
6930 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6932 if self.op.disk_template is None:
6933 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6934 self.op.disk_template = einfo.get(constants.INISECT_INS,
6937 raise errors.OpPrereqError("No disk template specified and the export"
6938 " is missing the disk_template information",
6941 if not self.op.disks:
6942 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6944 # TODO: import the disk iv_name too
6945 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6946 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6947 disks.append({"size": disk_sz})
6948 self.op.disks = disks
6950 raise errors.OpPrereqError("No disk info specified and the export"
6951 " is missing the disk information",
6954 if (not self.op.nics and
6955 einfo.has_option(constants.INISECT_INS, "nic_count")):
6957 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6959 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6960 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6965 if (self.op.hypervisor is None and
6966 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6967 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6968 if einfo.has_section(constants.INISECT_HYP):
6969 # use the export parameters but do not override the ones
6970 # specified by the user
6971 for name, value in einfo.items(constants.INISECT_HYP):
6972 if name not in self.op.hvparams:
6973 self.op.hvparams[name] = value
6975 if einfo.has_section(constants.INISECT_BEP):
6976 # use the parameters, without overriding
6977 for name, value in einfo.items(constants.INISECT_BEP):
6978 if name not in self.op.beparams:
6979 self.op.beparams[name] = value
6981 # try to read the parameters old style, from the main section
6982 for name in constants.BES_PARAMETERS:
6983 if (name not in self.op.beparams and
6984 einfo.has_option(constants.INISECT_INS, name)):
6985 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6987 if einfo.has_section(constants.INISECT_OSP):
6988 # use the parameters, without overriding
6989 for name, value in einfo.items(constants.INISECT_OSP):
6990 if name not in self.op.osparams:
6991 self.op.osparams[name] = value
6993 def _RevertToDefaults(self, cluster):
6994 """Revert the instance parameters to the default values.
6998 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6999 for name in self.op.hvparams.keys():
7000 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7001 del self.op.hvparams[name]
7003 be_defs = cluster.SimpleFillBE({})
7004 for name in self.op.beparams.keys():
7005 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7006 del self.op.beparams[name]
7008 nic_defs = cluster.SimpleFillNIC({})
7009 for nic in self.op.nics:
7010 for name in constants.NICS_PARAMETERS:
7011 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7014 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7015 for name in self.op.osparams.keys():
7016 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7017 del self.op.osparams[name]
7019 def CheckPrereq(self):
7020 """Check prerequisites.
7023 if self.op.mode == constants.INSTANCE_IMPORT:
7024 export_info = self._ReadExportInfo()
7025 self._ReadExportParams(export_info)
7027 _CheckDiskTemplate(self.op.disk_template)
7029 if (not self.cfg.GetVGName() and
7030 self.op.disk_template not in constants.DTS_NOT_LVM):
7031 raise errors.OpPrereqError("Cluster does not support lvm-based"
7032 " instances", errors.ECODE_STATE)
7034 if self.op.hypervisor is None:
7035 self.op.hypervisor = self.cfg.GetHypervisorType()
7037 cluster = self.cfg.GetClusterInfo()
7038 enabled_hvs = cluster.enabled_hypervisors
7039 if self.op.hypervisor not in enabled_hvs:
7040 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7041 " cluster (%s)" % (self.op.hypervisor,
7042 ",".join(enabled_hvs)),
7045 # check hypervisor parameter syntax (locally)
7046 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7047 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7049 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7050 hv_type.CheckParameterSyntax(filled_hvp)
7051 self.hv_full = filled_hvp
7052 # check that we don't specify global parameters on an instance
7053 _CheckGlobalHvParams(self.op.hvparams)
7055 # fill and remember the beparams dict
7056 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7057 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7059 # build os parameters
7060 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7062 # now that hvp/bep are in final format, let's reset to defaults,
7064 if self.op.identify_defaults:
7065 self._RevertToDefaults(cluster)
7069 for idx, nic in enumerate(self.op.nics):
7070 nic_mode_req = nic.get("mode", None)
7071 nic_mode = nic_mode_req
7072 if nic_mode is None:
7073 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7075 # in routed mode, for the first nic, the default ip is 'auto'
7076 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7077 default_ip_mode = constants.VALUE_AUTO
7079 default_ip_mode = constants.VALUE_NONE
7081 # ip validity checks
7082 ip = nic.get("ip", default_ip_mode)
7083 if ip is None or ip.lower() == constants.VALUE_NONE:
7085 elif ip.lower() == constants.VALUE_AUTO:
7086 if not self.op.name_check:
7087 raise errors.OpPrereqError("IP address set to auto but name checks"
7088 " have been skipped. Aborting.",
7090 nic_ip = self.hostname1.ip
7092 if not netutils.IsValidIP4(ip):
7093 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7094 " like a valid IP" % ip,
7098 # TODO: check the ip address for uniqueness
7099 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7100 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7103 # MAC address verification
7104 mac = nic.get("mac", constants.VALUE_AUTO)
7105 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7106 mac = utils.NormalizeAndValidateMac(mac)
7109 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7110 except errors.ReservationError:
7111 raise errors.OpPrereqError("MAC address %s already in use"
7112 " in cluster" % mac,
7113 errors.ECODE_NOTUNIQUE)
7115 # bridge verification
7116 bridge = nic.get("bridge", None)
7117 link = nic.get("link", None)
7119 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7120 " at the same time", errors.ECODE_INVAL)
7121 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7122 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7129 nicparams[constants.NIC_MODE] = nic_mode_req
7131 nicparams[constants.NIC_LINK] = link
7133 check_params = cluster.SimpleFillNIC(nicparams)
7134 objects.NIC.CheckParameterSyntax(check_params)
7135 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7137 # disk checks/pre-build
7139 for disk in self.op.disks:
7140 mode = disk.get("mode", constants.DISK_RDWR)
7141 if mode not in constants.DISK_ACCESS_SET:
7142 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7143 mode, errors.ECODE_INVAL)
7144 size = disk.get("size", None)
7146 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7149 except (TypeError, ValueError):
7150 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7152 new_disk = {"size": size, "mode": mode}
7154 new_disk["adopt"] = disk["adopt"]
7155 self.disks.append(new_disk)
7157 if self.op.mode == constants.INSTANCE_IMPORT:
7159 # Check that the new instance doesn't have less disks than the export
7160 instance_disks = len(self.disks)
7161 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7162 if instance_disks < export_disks:
7163 raise errors.OpPrereqError("Not enough disks to import."
7164 " (instance: %d, export: %d)" %
7165 (instance_disks, export_disks),
7169 for idx in range(export_disks):
7170 option = 'disk%d_dump' % idx
7171 if export_info.has_option(constants.INISECT_INS, option):
7172 # FIXME: are the old os-es, disk sizes, etc. useful?
7173 export_name = export_info.get(constants.INISECT_INS, option)
7174 image = utils.PathJoin(self.op.src_path, export_name)
7175 disk_images.append(image)
7177 disk_images.append(False)
7179 self.src_images = disk_images
7181 old_name = export_info.get(constants.INISECT_INS, 'name')
7183 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7184 except (TypeError, ValueError), err:
7185 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7186 " an integer: %s" % str(err),
7188 if self.op.instance_name == old_name:
7189 for idx, nic in enumerate(self.nics):
7190 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7191 nic_mac_ini = 'nic%d_mac' % idx
7192 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7194 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7196 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7197 if self.op.ip_check:
7198 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7199 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7200 (self.check_ip, self.op.instance_name),
7201 errors.ECODE_NOTUNIQUE)
7203 #### mac address generation
7204 # By generating here the mac address both the allocator and the hooks get
7205 # the real final mac address rather than the 'auto' or 'generate' value.
7206 # There is a race condition between the generation and the instance object
7207 # creation, which means that we know the mac is valid now, but we're not
7208 # sure it will be when we actually add the instance. If things go bad
7209 # adding the instance will abort because of a duplicate mac, and the
7210 # creation job will fail.
7211 for nic in self.nics:
7212 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7213 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7217 if self.op.iallocator is not None:
7218 self._RunAllocator()
7220 #### node related checks
7222 # check primary node
7223 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7224 assert self.pnode is not None, \
7225 "Cannot retrieve locked node %s" % self.op.pnode
7227 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7228 pnode.name, errors.ECODE_STATE)
7230 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7231 pnode.name, errors.ECODE_STATE)
7233 self.secondaries = []
7235 # mirror node verification
7236 if self.op.disk_template in constants.DTS_NET_MIRROR:
7237 if self.op.snode == pnode.name:
7238 raise errors.OpPrereqError("The secondary node cannot be the"
7239 " primary node.", errors.ECODE_INVAL)
7240 _CheckNodeOnline(self, self.op.snode)
7241 _CheckNodeNotDrained(self, self.op.snode)
7242 self.secondaries.append(self.op.snode)
7244 nodenames = [pnode.name] + self.secondaries
7246 req_size = _ComputeDiskSize(self.op.disk_template,
7249 # Check lv size requirements, if not adopting
7250 if req_size is not None and not self.adopt_disks:
7251 _CheckNodesFreeDisk(self, nodenames, req_size)
7253 if self.adopt_disks: # instead, we must check the adoption data
7254 all_lvs = set([i["adopt"] for i in self.disks])
7255 if len(all_lvs) != len(self.disks):
7256 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7258 for lv_name in all_lvs:
7260 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7261 except errors.ReservationError:
7262 raise errors.OpPrereqError("LV named %s used by another instance" %
7263 lv_name, errors.ECODE_NOTUNIQUE)
7265 node_lvs = self.rpc.call_lv_list([pnode.name],
7266 self.cfg.GetVGName())[pnode.name]
7267 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7268 node_lvs = node_lvs.payload
7269 delta = all_lvs.difference(node_lvs.keys())
7271 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7272 utils.CommaJoin(delta),
7274 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7276 raise errors.OpPrereqError("Online logical volumes found, cannot"
7277 " adopt: %s" % utils.CommaJoin(online_lvs),
7279 # update the size of disk based on what is found
7280 for dsk in self.disks:
7281 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7283 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7285 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7286 # check OS parameters (remotely)
7287 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7289 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7291 # memory check on primary node
7293 _CheckNodeFreeMemory(self, self.pnode.name,
7294 "creating instance %s" % self.op.instance_name,
7295 self.be_full[constants.BE_MEMORY],
7298 self.dry_run_result = list(nodenames)
7300 def Exec(self, feedback_fn):
7301 """Create and add the instance to the cluster.
7304 instance = self.op.instance_name
7305 pnode_name = self.pnode.name
7307 ht_kind = self.op.hypervisor
7308 if ht_kind in constants.HTS_REQ_PORT:
7309 network_port = self.cfg.AllocatePort()
7313 if constants.ENABLE_FILE_STORAGE:
7314 # this is needed because os.path.join does not accept None arguments
7315 if self.op.file_storage_dir is None:
7316 string_file_storage_dir = ""
7318 string_file_storage_dir = self.op.file_storage_dir
7320 # build the full file storage dir path
7321 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7322 string_file_storage_dir, instance)
7324 file_storage_dir = ""
7326 disks = _GenerateDiskTemplate(self,
7327 self.op.disk_template,
7328 instance, pnode_name,
7332 self.op.file_driver,
7335 iobj = objects.Instance(name=instance, os=self.op.os_type,
7336 primary_node=pnode_name,
7337 nics=self.nics, disks=disks,
7338 disk_template=self.op.disk_template,
7340 network_port=network_port,
7341 beparams=self.op.beparams,
7342 hvparams=self.op.hvparams,
7343 hypervisor=self.op.hypervisor,
7344 osparams=self.op.osparams,
7347 if self.adopt_disks:
7348 # rename LVs to the newly-generated names; we need to construct
7349 # 'fake' LV disks with the old data, plus the new unique_id
7350 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7352 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7353 rename_to.append(t_dsk.logical_id)
7354 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7355 self.cfg.SetDiskID(t_dsk, pnode_name)
7356 result = self.rpc.call_blockdev_rename(pnode_name,
7357 zip(tmp_disks, rename_to))
7358 result.Raise("Failed to rename adoped LVs")
7360 feedback_fn("* creating instance disks...")
7362 _CreateDisks(self, iobj)
7363 except errors.OpExecError:
7364 self.LogWarning("Device creation failed, reverting...")
7366 _RemoveDisks(self, iobj)
7368 self.cfg.ReleaseDRBDMinors(instance)
7371 feedback_fn("adding instance %s to cluster config" % instance)
7373 self.cfg.AddInstance(iobj, self.proc.GetECId())
7375 # Declare that we don't want to remove the instance lock anymore, as we've
7376 # added the instance to the config
7377 del self.remove_locks[locking.LEVEL_INSTANCE]
7378 # Unlock all the nodes
7379 if self.op.mode == constants.INSTANCE_IMPORT:
7380 nodes_keep = [self.op.src_node]
7381 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7382 if node != self.op.src_node]
7383 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7384 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7386 self.context.glm.release(locking.LEVEL_NODE)
7387 del self.acquired_locks[locking.LEVEL_NODE]
7389 if self.op.wait_for_sync:
7390 disk_abort = not _WaitForSync(self, iobj)
7391 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7392 # make sure the disks are not degraded (still sync-ing is ok)
7394 feedback_fn("* checking mirrors status")
7395 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7400 _RemoveDisks(self, iobj)
7401 self.cfg.RemoveInstance(iobj.name)
7402 # Make sure the instance lock gets removed
7403 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7404 raise errors.OpExecError("There are some degraded disks for"
7407 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7408 if self.op.mode == constants.INSTANCE_CREATE:
7409 if not self.op.no_install:
7410 feedback_fn("* running the instance OS create scripts...")
7411 # FIXME: pass debug option from opcode to backend
7412 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7413 self.op.debug_level)
7414 result.Raise("Could not add os for instance %s"
7415 " on node %s" % (instance, pnode_name))
7417 elif self.op.mode == constants.INSTANCE_IMPORT:
7418 feedback_fn("* running the instance OS import scripts...")
7422 for idx, image in enumerate(self.src_images):
7426 # FIXME: pass debug option from opcode to backend
7427 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7428 constants.IEIO_FILE, (image, ),
7429 constants.IEIO_SCRIPT,
7430 (iobj.disks[idx], idx),
7432 transfers.append(dt)
7435 masterd.instance.TransferInstanceData(self, feedback_fn,
7436 self.op.src_node, pnode_name,
7437 self.pnode.secondary_ip,
7439 if not compat.all(import_result):
7440 self.LogWarning("Some disks for instance %s on node %s were not"
7441 " imported successfully" % (instance, pnode_name))
7443 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7444 feedback_fn("* preparing remote import...")
7445 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7446 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7448 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7449 self.source_x509_ca,
7450 self._cds, timeouts)
7451 if not compat.all(disk_results):
7452 # TODO: Should the instance still be started, even if some disks
7453 # failed to import (valid for local imports, too)?
7454 self.LogWarning("Some disks for instance %s on node %s were not"
7455 " imported successfully" % (instance, pnode_name))
7457 # Run rename script on newly imported instance
7458 assert iobj.name == instance
7459 feedback_fn("Running rename script for %s" % instance)
7460 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7461 self.source_instance_name,
7462 self.op.debug_level)
7464 self.LogWarning("Failed to run rename script for %s on node"
7465 " %s: %s" % (instance, pnode_name, result.fail_msg))
7468 # also checked in the prereq part
7469 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7473 iobj.admin_up = True
7474 self.cfg.Update(iobj, feedback_fn)
7475 logging.info("Starting instance %s on node %s", instance, pnode_name)
7476 feedback_fn("* starting instance...")
7477 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7478 result.Raise("Could not start instance")
7480 return list(iobj.all_nodes)
7483 class LUConnectConsole(NoHooksLU):
7484 """Connect to an instance's console.
7486 This is somewhat special in that it returns the command line that
7487 you need to run on the master node in order to connect to the
7496 def ExpandNames(self):
7497 self._ExpandAndLockInstance()
7499 def CheckPrereq(self):
7500 """Check prerequisites.
7502 This checks that the instance is in the cluster.
7505 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7506 assert self.instance is not None, \
7507 "Cannot retrieve locked instance %s" % self.op.instance_name
7508 _CheckNodeOnline(self, self.instance.primary_node)
7510 def Exec(self, feedback_fn):
7511 """Connect to the console of an instance
7514 instance = self.instance
7515 node = instance.primary_node
7517 node_insts = self.rpc.call_instance_list([node],
7518 [instance.hypervisor])[node]
7519 node_insts.Raise("Can't get node information from %s" % node)
7521 if instance.name not in node_insts.payload:
7522 raise errors.OpExecError("Instance %s is not running." % instance.name)
7524 logging.debug("Connecting to console of %s on %s", instance.name, node)
7526 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7527 cluster = self.cfg.GetClusterInfo()
7528 # beparams and hvparams are passed separately, to avoid editing the
7529 # instance and then saving the defaults in the instance itself.
7530 hvparams = cluster.FillHV(instance)
7531 beparams = cluster.FillBE(instance)
7532 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7535 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7538 class LUReplaceDisks(LogicalUnit):
7539 """Replace the disks of an instance.
7542 HPATH = "mirrors-replace"
7543 HTYPE = constants.HTYPE_INSTANCE
7546 ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7547 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7548 ("remote_node", None, _TMaybeString),
7549 ("iallocator", None, _TMaybeString),
7550 ("early_release", False, _TBool),
7554 def CheckArguments(self):
7555 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7558 def ExpandNames(self):
7559 self._ExpandAndLockInstance()
7561 if self.op.iallocator is not None:
7562 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7564 elif self.op.remote_node is not None:
7565 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7566 self.op.remote_node = remote_node
7568 # Warning: do not remove the locking of the new secondary here
7569 # unless DRBD8.AddChildren is changed to work in parallel;
7570 # currently it doesn't since parallel invocations of
7571 # FindUnusedMinor will conflict
7572 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7573 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7576 self.needed_locks[locking.LEVEL_NODE] = []
7577 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7579 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7580 self.op.iallocator, self.op.remote_node,
7581 self.op.disks, False, self.op.early_release)
7583 self.tasklets = [self.replacer]
7585 def DeclareLocks(self, level):
7586 # If we're not already locking all nodes in the set we have to declare the
7587 # instance's primary/secondary nodes.
7588 if (level == locking.LEVEL_NODE and
7589 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7590 self._LockInstancesNodes()
7592 def BuildHooksEnv(self):
7595 This runs on the master, the primary and all the secondaries.
7598 instance = self.replacer.instance
7600 "MODE": self.op.mode,
7601 "NEW_SECONDARY": self.op.remote_node,
7602 "OLD_SECONDARY": instance.secondary_nodes[0],
7604 env.update(_BuildInstanceHookEnvByObject(self, instance))
7606 self.cfg.GetMasterNode(),
7607 instance.primary_node,
7609 if self.op.remote_node is not None:
7610 nl.append(self.op.remote_node)
7614 class TLReplaceDisks(Tasklet):
7615 """Replaces disks for an instance.
7617 Note: Locking is not within the scope of this class.
7620 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7621 disks, delay_iallocator, early_release):
7622 """Initializes this class.
7625 Tasklet.__init__(self, lu)
7628 self.instance_name = instance_name
7630 self.iallocator_name = iallocator_name
7631 self.remote_node = remote_node
7633 self.delay_iallocator = delay_iallocator
7634 self.early_release = early_release
7637 self.instance = None
7638 self.new_node = None
7639 self.target_node = None
7640 self.other_node = None
7641 self.remote_node_info = None
7642 self.node_secondary_ip = None
7645 def CheckArguments(mode, remote_node, iallocator):
7646 """Helper function for users of this class.
7649 # check for valid parameter combination
7650 if mode == constants.REPLACE_DISK_CHG:
7651 if remote_node is None and iallocator is None:
7652 raise errors.OpPrereqError("When changing the secondary either an"
7653 " iallocator script must be used or the"
7654 " new node given", errors.ECODE_INVAL)
7656 if remote_node is not None and iallocator is not None:
7657 raise errors.OpPrereqError("Give either the iallocator or the new"
7658 " secondary, not both", errors.ECODE_INVAL)
7660 elif remote_node is not None or iallocator is not None:
7661 # Not replacing the secondary
7662 raise errors.OpPrereqError("The iallocator and new node options can"
7663 " only be used when changing the"
7664 " secondary node", errors.ECODE_INVAL)
7667 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7668 """Compute a new secondary node using an IAllocator.
7671 ial = IAllocator(lu.cfg, lu.rpc,
7672 mode=constants.IALLOCATOR_MODE_RELOC,
7674 relocate_from=relocate_from)
7676 ial.Run(iallocator_name)
7679 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7680 " %s" % (iallocator_name, ial.info),
7683 if len(ial.result) != ial.required_nodes:
7684 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7685 " of nodes (%s), required %s" %
7687 len(ial.result), ial.required_nodes),
7690 remote_node_name = ial.result[0]
7692 lu.LogInfo("Selected new secondary for instance '%s': %s",
7693 instance_name, remote_node_name)
7695 return remote_node_name
7697 def _FindFaultyDisks(self, node_name):
7698 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7701 def CheckPrereq(self):
7702 """Check prerequisites.
7704 This checks that the instance is in the cluster.
7707 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7708 assert instance is not None, \
7709 "Cannot retrieve locked instance %s" % self.instance_name
7711 if instance.disk_template != constants.DT_DRBD8:
7712 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7713 " instances", errors.ECODE_INVAL)
7715 if len(instance.secondary_nodes) != 1:
7716 raise errors.OpPrereqError("The instance has a strange layout,"
7717 " expected one secondary but found %d" %
7718 len(instance.secondary_nodes),
7721 if not self.delay_iallocator:
7722 self._CheckPrereq2()
7724 def _CheckPrereq2(self):
7725 """Check prerequisites, second part.
7727 This function should always be part of CheckPrereq. It was separated and is
7728 now called from Exec because during node evacuation iallocator was only
7729 called with an unmodified cluster model, not taking planned changes into
7733 instance = self.instance
7734 secondary_node = instance.secondary_nodes[0]
7736 if self.iallocator_name is None:
7737 remote_node = self.remote_node
7739 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7740 instance.name, instance.secondary_nodes)
7742 if remote_node is not None:
7743 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7744 assert self.remote_node_info is not None, \
7745 "Cannot retrieve locked node %s" % remote_node
7747 self.remote_node_info = None
7749 if remote_node == self.instance.primary_node:
7750 raise errors.OpPrereqError("The specified node is the primary node of"
7751 " the instance.", errors.ECODE_INVAL)
7753 if remote_node == secondary_node:
7754 raise errors.OpPrereqError("The specified node is already the"
7755 " secondary node of the instance.",
7758 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7759 constants.REPLACE_DISK_CHG):
7760 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7763 if self.mode == constants.REPLACE_DISK_AUTO:
7764 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7765 faulty_secondary = self._FindFaultyDisks(secondary_node)
7767 if faulty_primary and faulty_secondary:
7768 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7769 " one node and can not be repaired"
7770 " automatically" % self.instance_name,
7774 self.disks = faulty_primary
7775 self.target_node = instance.primary_node
7776 self.other_node = secondary_node
7777 check_nodes = [self.target_node, self.other_node]
7778 elif faulty_secondary:
7779 self.disks = faulty_secondary
7780 self.target_node = secondary_node
7781 self.other_node = instance.primary_node
7782 check_nodes = [self.target_node, self.other_node]
7788 # Non-automatic modes
7789 if self.mode == constants.REPLACE_DISK_PRI:
7790 self.target_node = instance.primary_node
7791 self.other_node = secondary_node
7792 check_nodes = [self.target_node, self.other_node]
7794 elif self.mode == constants.REPLACE_DISK_SEC:
7795 self.target_node = secondary_node
7796 self.other_node = instance.primary_node
7797 check_nodes = [self.target_node, self.other_node]
7799 elif self.mode == constants.REPLACE_DISK_CHG:
7800 self.new_node = remote_node
7801 self.other_node = instance.primary_node
7802 self.target_node = secondary_node
7803 check_nodes = [self.new_node, self.other_node]
7805 _CheckNodeNotDrained(self.lu, remote_node)
7807 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7808 assert old_node_info is not None
7809 if old_node_info.offline and not self.early_release:
7810 # doesn't make sense to delay the release
7811 self.early_release = True
7812 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7813 " early-release mode", secondary_node)
7816 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7819 # If not specified all disks should be replaced
7821 self.disks = range(len(self.instance.disks))
7823 for node in check_nodes:
7824 _CheckNodeOnline(self.lu, node)
7826 # Check whether disks are valid
7827 for disk_idx in self.disks:
7828 instance.FindDisk(disk_idx)
7830 # Get secondary node IP addresses
7833 for node_name in [self.target_node, self.other_node, self.new_node]:
7834 if node_name is not None:
7835 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7837 self.node_secondary_ip = node_2nd_ip
7839 def Exec(self, feedback_fn):
7840 """Execute disk replacement.
7842 This dispatches the disk replacement to the appropriate handler.
7845 if self.delay_iallocator:
7846 self._CheckPrereq2()
7849 feedback_fn("No disks need replacement")
7852 feedback_fn("Replacing disk(s) %s for %s" %
7853 (utils.CommaJoin(self.disks), self.instance.name))
7855 activate_disks = (not self.instance.admin_up)
7857 # Activate the instance disks if we're replacing them on a down instance
7859 _StartInstanceDisks(self.lu, self.instance, True)
7862 # Should we replace the secondary node?
7863 if self.new_node is not None:
7864 fn = self._ExecDrbd8Secondary
7866 fn = self._ExecDrbd8DiskOnly
7868 return fn(feedback_fn)
7871 # Deactivate the instance disks if we're replacing them on a
7874 _SafeShutdownInstanceDisks(self.lu, self.instance)
7876 def _CheckVolumeGroup(self, nodes):
7877 self.lu.LogInfo("Checking volume groups")
7879 vgname = self.cfg.GetVGName()
7881 # Make sure volume group exists on all involved nodes
7882 results = self.rpc.call_vg_list(nodes)
7884 raise errors.OpExecError("Can't list volume groups on the nodes")
7888 res.Raise("Error checking node %s" % node)
7889 if vgname not in res.payload:
7890 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7893 def _CheckDisksExistence(self, nodes):
7894 # Check disk existence
7895 for idx, dev in enumerate(self.instance.disks):
7896 if idx not in self.disks:
7900 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7901 self.cfg.SetDiskID(dev, node)
7903 result = self.rpc.call_blockdev_find(node, dev)
7905 msg = result.fail_msg
7906 if msg or not result.payload:
7908 msg = "disk not found"
7909 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7912 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7913 for idx, dev in enumerate(self.instance.disks):
7914 if idx not in self.disks:
7917 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7920 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7922 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7923 " replace disks for instance %s" %
7924 (node_name, self.instance.name))
7926 def _CreateNewStorage(self, node_name):
7927 vgname = self.cfg.GetVGName()
7930 for idx, dev in enumerate(self.instance.disks):
7931 if idx not in self.disks:
7934 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7936 self.cfg.SetDiskID(dev, node_name)
7938 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7939 names = _GenerateUniqueNames(self.lu, lv_names)
7941 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7942 logical_id=(vgname, names[0]))
7943 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7944 logical_id=(vgname, names[1]))
7946 new_lvs = [lv_data, lv_meta]
7947 old_lvs = dev.children
7948 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7950 # we pass force_create=True to force the LVM creation
7951 for new_lv in new_lvs:
7952 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7953 _GetInstanceInfoText(self.instance), False)
7957 def _CheckDevices(self, node_name, iv_names):
7958 for name, (dev, _, _) in iv_names.iteritems():
7959 self.cfg.SetDiskID(dev, node_name)
7961 result = self.rpc.call_blockdev_find(node_name, dev)
7963 msg = result.fail_msg
7964 if msg or not result.payload:
7966 msg = "disk not found"
7967 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7970 if result.payload.is_degraded:
7971 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7973 def _RemoveOldStorage(self, node_name, iv_names):
7974 for name, (_, old_lvs, _) in iv_names.iteritems():
7975 self.lu.LogInfo("Remove logical volumes for %s" % name)
7978 self.cfg.SetDiskID(lv, node_name)
7980 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7982 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7983 hint="remove unused LVs manually")
7985 def _ReleaseNodeLock(self, node_name):
7986 """Releases the lock for a given node."""
7987 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7989 def _ExecDrbd8DiskOnly(self, feedback_fn):
7990 """Replace a disk on the primary or secondary for DRBD 8.
7992 The algorithm for replace is quite complicated:
7994 1. for each disk to be replaced:
7996 1. create new LVs on the target node with unique names
7997 1. detach old LVs from the drbd device
7998 1. rename old LVs to name_replaced.<time_t>
7999 1. rename new LVs to old LVs
8000 1. attach the new LVs (with the old names now) to the drbd device
8002 1. wait for sync across all devices
8004 1. for each modified disk:
8006 1. remove old LVs (which have the name name_replaces.<time_t>)
8008 Failures are not very well handled.
8013 # Step: check device activation
8014 self.lu.LogStep(1, steps_total, "Check device existence")
8015 self._CheckDisksExistence([self.other_node, self.target_node])
8016 self._CheckVolumeGroup([self.target_node, self.other_node])
8018 # Step: check other node consistency
8019 self.lu.LogStep(2, steps_total, "Check peer consistency")
8020 self._CheckDisksConsistency(self.other_node,
8021 self.other_node == self.instance.primary_node,
8024 # Step: create new storage
8025 self.lu.LogStep(3, steps_total, "Allocate new storage")
8026 iv_names = self._CreateNewStorage(self.target_node)
8028 # Step: for each lv, detach+rename*2+attach
8029 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8030 for dev, old_lvs, new_lvs in iv_names.itervalues():
8031 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8033 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8035 result.Raise("Can't detach drbd from local storage on node"
8036 " %s for device %s" % (self.target_node, dev.iv_name))
8038 #cfg.Update(instance)
8040 # ok, we created the new LVs, so now we know we have the needed
8041 # storage; as such, we proceed on the target node to rename
8042 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8043 # using the assumption that logical_id == physical_id (which in
8044 # turn is the unique_id on that node)
8046 # FIXME(iustin): use a better name for the replaced LVs
8047 temp_suffix = int(time.time())
8048 ren_fn = lambda d, suff: (d.physical_id[0],
8049 d.physical_id[1] + "_replaced-%s" % suff)
8051 # Build the rename list based on what LVs exist on the node
8052 rename_old_to_new = []
8053 for to_ren in old_lvs:
8054 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8055 if not result.fail_msg and result.payload:
8057 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8059 self.lu.LogInfo("Renaming the old LVs on the target node")
8060 result = self.rpc.call_blockdev_rename(self.target_node,
8062 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8064 # Now we rename the new LVs to the old LVs
8065 self.lu.LogInfo("Renaming the new LVs on the target node")
8066 rename_new_to_old = [(new, old.physical_id)
8067 for old, new in zip(old_lvs, new_lvs)]
8068 result = self.rpc.call_blockdev_rename(self.target_node,
8070 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8072 for old, new in zip(old_lvs, new_lvs):
8073 new.logical_id = old.logical_id
8074 self.cfg.SetDiskID(new, self.target_node)
8076 for disk in old_lvs:
8077 disk.logical_id = ren_fn(disk, temp_suffix)
8078 self.cfg.SetDiskID(disk, self.target_node)
8080 # Now that the new lvs have the old name, we can add them to the device
8081 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8082 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8084 msg = result.fail_msg
8086 for new_lv in new_lvs:
8087 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8090 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8091 hint=("cleanup manually the unused logical"
8093 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8095 dev.children = new_lvs
8097 self.cfg.Update(self.instance, feedback_fn)
8100 if self.early_release:
8101 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8103 self._RemoveOldStorage(self.target_node, iv_names)
8104 # WARNING: we release both node locks here, do not do other RPCs
8105 # than WaitForSync to the primary node
8106 self._ReleaseNodeLock([self.target_node, self.other_node])
8109 # This can fail as the old devices are degraded and _WaitForSync
8110 # does a combined result over all disks, so we don't check its return value
8111 self.lu.LogStep(cstep, steps_total, "Sync devices")
8113 _WaitForSync(self.lu, self.instance)
8115 # Check all devices manually
8116 self._CheckDevices(self.instance.primary_node, iv_names)
8118 # Step: remove old storage
8119 if not self.early_release:
8120 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8122 self._RemoveOldStorage(self.target_node, iv_names)
8124 def _ExecDrbd8Secondary(self, feedback_fn):
8125 """Replace the secondary node for DRBD 8.
8127 The algorithm for replace is quite complicated:
8128 - for all disks of the instance:
8129 - create new LVs on the new node with same names
8130 - shutdown the drbd device on the old secondary
8131 - disconnect the drbd network on the primary
8132 - create the drbd device on the new secondary
8133 - network attach the drbd on the primary, using an artifice:
8134 the drbd code for Attach() will connect to the network if it
8135 finds a device which is connected to the good local disks but
8137 - wait for sync across all devices
8138 - remove all disks from the old secondary
8140 Failures are not very well handled.
8145 # Step: check device activation
8146 self.lu.LogStep(1, steps_total, "Check device existence")
8147 self._CheckDisksExistence([self.instance.primary_node])
8148 self._CheckVolumeGroup([self.instance.primary_node])
8150 # Step: check other node consistency
8151 self.lu.LogStep(2, steps_total, "Check peer consistency")
8152 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8154 # Step: create new storage
8155 self.lu.LogStep(3, steps_total, "Allocate new storage")
8156 for idx, dev in enumerate(self.instance.disks):
8157 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8158 (self.new_node, idx))
8159 # we pass force_create=True to force LVM creation
8160 for new_lv in dev.children:
8161 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8162 _GetInstanceInfoText(self.instance), False)
8164 # Step 4: dbrd minors and drbd setups changes
8165 # after this, we must manually remove the drbd minors on both the
8166 # error and the success paths
8167 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8168 minors = self.cfg.AllocateDRBDMinor([self.new_node
8169 for dev in self.instance.disks],
8171 logging.debug("Allocated minors %r", minors)
8174 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8175 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8176 (self.new_node, idx))
8177 # create new devices on new_node; note that we create two IDs:
8178 # one without port, so the drbd will be activated without
8179 # networking information on the new node at this stage, and one
8180 # with network, for the latter activation in step 4
8181 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8182 if self.instance.primary_node == o_node1:
8185 assert self.instance.primary_node == o_node2, "Three-node instance?"
8188 new_alone_id = (self.instance.primary_node, self.new_node, None,
8189 p_minor, new_minor, o_secret)
8190 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8191 p_minor, new_minor, o_secret)
8193 iv_names[idx] = (dev, dev.children, new_net_id)
8194 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8196 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8197 logical_id=new_alone_id,
8198 children=dev.children,
8201 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8202 _GetInstanceInfoText(self.instance), False)
8203 except errors.GenericError:
8204 self.cfg.ReleaseDRBDMinors(self.instance.name)
8207 # We have new devices, shutdown the drbd on the old secondary
8208 for idx, dev in enumerate(self.instance.disks):
8209 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8210 self.cfg.SetDiskID(dev, self.target_node)
8211 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8213 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8214 "node: %s" % (idx, msg),
8215 hint=("Please cleanup this device manually as"
8216 " soon as possible"))
8218 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8219 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8220 self.node_secondary_ip,
8221 self.instance.disks)\
8222 [self.instance.primary_node]
8224 msg = result.fail_msg
8226 # detaches didn't succeed (unlikely)
8227 self.cfg.ReleaseDRBDMinors(self.instance.name)
8228 raise errors.OpExecError("Can't detach the disks from the network on"
8229 " old node: %s" % (msg,))
8231 # if we managed to detach at least one, we update all the disks of
8232 # the instance to point to the new secondary
8233 self.lu.LogInfo("Updating instance configuration")
8234 for dev, _, new_logical_id in iv_names.itervalues():
8235 dev.logical_id = new_logical_id
8236 self.cfg.SetDiskID(dev, self.instance.primary_node)
8238 self.cfg.Update(self.instance, feedback_fn)
8240 # and now perform the drbd attach
8241 self.lu.LogInfo("Attaching primary drbds to new secondary"
8242 " (standalone => connected)")
8243 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8245 self.node_secondary_ip,
8246 self.instance.disks,
8249 for to_node, to_result in result.items():
8250 msg = to_result.fail_msg
8252 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8254 hint=("please do a gnt-instance info to see the"
8255 " status of disks"))
8257 if self.early_release:
8258 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8260 self._RemoveOldStorage(self.target_node, iv_names)
8261 # WARNING: we release all node locks here, do not do other RPCs
8262 # than WaitForSync to the primary node
8263 self._ReleaseNodeLock([self.instance.primary_node,
8268 # This can fail as the old devices are degraded and _WaitForSync
8269 # does a combined result over all disks, so we don't check its return value
8270 self.lu.LogStep(cstep, steps_total, "Sync devices")
8272 _WaitForSync(self.lu, self.instance)
8274 # Check all devices manually
8275 self._CheckDevices(self.instance.primary_node, iv_names)
8277 # Step: remove old storage
8278 if not self.early_release:
8279 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8280 self._RemoveOldStorage(self.target_node, iv_names)
8283 class LURepairNodeStorage(NoHooksLU):
8284 """Repairs the volume group on a node.
8289 ("storage_type", _NoDefault, _CheckStorageType),
8290 ("name", _NoDefault, _TNonEmptyString),
8291 ("ignore_consistency", False, _TBool),
8295 def CheckArguments(self):
8296 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8298 storage_type = self.op.storage_type
8300 if (constants.SO_FIX_CONSISTENCY not in
8301 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8302 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8303 " repaired" % storage_type,
8306 def ExpandNames(self):
8307 self.needed_locks = {
8308 locking.LEVEL_NODE: [self.op.node_name],
8311 def _CheckFaultyDisks(self, instance, node_name):
8312 """Ensure faulty disks abort the opcode or at least warn."""
8314 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8316 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8317 " node '%s'" % (instance.name, node_name),
8319 except errors.OpPrereqError, err:
8320 if self.op.ignore_consistency:
8321 self.proc.LogWarning(str(err.args[0]))
8325 def CheckPrereq(self):
8326 """Check prerequisites.
8329 # Check whether any instance on this node has faulty disks
8330 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8331 if not inst.admin_up:
8333 check_nodes = set(inst.all_nodes)
8334 check_nodes.discard(self.op.node_name)
8335 for inst_node_name in check_nodes:
8336 self._CheckFaultyDisks(inst, inst_node_name)
8338 def Exec(self, feedback_fn):
8339 feedback_fn("Repairing storage unit '%s' on %s ..." %
8340 (self.op.name, self.op.node_name))
8342 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8343 result = self.rpc.call_storage_execute(self.op.node_name,
8344 self.op.storage_type, st_args,
8346 constants.SO_FIX_CONSISTENCY)
8347 result.Raise("Failed to repair storage unit '%s' on %s" %
8348 (self.op.name, self.op.node_name))
8351 class LUNodeEvacuationStrategy(NoHooksLU):
8352 """Computes the node evacuation strategy.
8356 ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8357 ("remote_node", None, _TMaybeString),
8358 ("iallocator", None, _TMaybeString),
8362 def CheckArguments(self):
8363 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8365 def ExpandNames(self):
8366 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8367 self.needed_locks = locks = {}
8368 if self.op.remote_node is None:
8369 locks[locking.LEVEL_NODE] = locking.ALL_SET
8371 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8372 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8374 def Exec(self, feedback_fn):
8375 if self.op.remote_node is not None:
8377 for node in self.op.nodes:
8378 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8381 if i.primary_node == self.op.remote_node:
8382 raise errors.OpPrereqError("Node %s is the primary node of"
8383 " instance %s, cannot use it as"
8385 (self.op.remote_node, i.name),
8387 result.append([i.name, self.op.remote_node])
8389 ial = IAllocator(self.cfg, self.rpc,
8390 mode=constants.IALLOCATOR_MODE_MEVAC,
8391 evac_nodes=self.op.nodes)
8392 ial.Run(self.op.iallocator, validate=True)
8394 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8400 class LUGrowDisk(LogicalUnit):
8401 """Grow a disk of an instance.
8405 HTYPE = constants.HTYPE_INSTANCE
8408 ("disk", _NoDefault, _TInt),
8409 ("amount", _NoDefault, _TInt),
8410 ("wait_for_sync", True, _TBool),
8414 def ExpandNames(self):
8415 self._ExpandAndLockInstance()
8416 self.needed_locks[locking.LEVEL_NODE] = []
8417 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8419 def DeclareLocks(self, level):
8420 if level == locking.LEVEL_NODE:
8421 self._LockInstancesNodes()
8423 def BuildHooksEnv(self):
8426 This runs on the master, the primary and all the secondaries.
8430 "DISK": self.op.disk,
8431 "AMOUNT": self.op.amount,
8433 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8434 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8437 def CheckPrereq(self):
8438 """Check prerequisites.
8440 This checks that the instance is in the cluster.
8443 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8444 assert instance is not None, \
8445 "Cannot retrieve locked instance %s" % self.op.instance_name
8446 nodenames = list(instance.all_nodes)
8447 for node in nodenames:
8448 _CheckNodeOnline(self, node)
8450 self.instance = instance
8452 if instance.disk_template not in constants.DTS_GROWABLE:
8453 raise errors.OpPrereqError("Instance's disk layout does not support"
8454 " growing.", errors.ECODE_INVAL)
8456 self.disk = instance.FindDisk(self.op.disk)
8458 if instance.disk_template != constants.DT_FILE:
8459 # TODO: check the free disk space for file, when that feature will be
8461 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8463 def Exec(self, feedback_fn):
8464 """Execute disk grow.
8467 instance = self.instance
8470 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8472 raise errors.OpExecError("Cannot activate block device to grow")
8474 for node in instance.all_nodes:
8475 self.cfg.SetDiskID(disk, node)
8476 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8477 result.Raise("Grow request failed to node %s" % node)
8479 # TODO: Rewrite code to work properly
8480 # DRBD goes into sync mode for a short amount of time after executing the
8481 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8482 # calling "resize" in sync mode fails. Sleeping for a short amount of
8483 # time is a work-around.
8486 disk.RecordGrow(self.op.amount)
8487 self.cfg.Update(instance, feedback_fn)
8488 if self.op.wait_for_sync:
8489 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8491 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8492 " status.\nPlease check the instance.")
8493 if not instance.admin_up:
8494 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8495 elif not instance.admin_up:
8496 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8497 " not supposed to be running because no wait for"
8498 " sync mode was requested.")
8501 class LUQueryInstanceData(NoHooksLU):
8502 """Query runtime instance data.
8506 ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8507 ("static", False, _TBool),
8511 def ExpandNames(self):
8512 self.needed_locks = {}
8513 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8515 if self.op.instances:
8516 self.wanted_names = []
8517 for name in self.op.instances:
8518 full_name = _ExpandInstanceName(self.cfg, name)
8519 self.wanted_names.append(full_name)
8520 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8522 self.wanted_names = None
8523 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8525 self.needed_locks[locking.LEVEL_NODE] = []
8526 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8528 def DeclareLocks(self, level):
8529 if level == locking.LEVEL_NODE:
8530 self._LockInstancesNodes()
8532 def CheckPrereq(self):
8533 """Check prerequisites.
8535 This only checks the optional instance list against the existing names.
8538 if self.wanted_names is None:
8539 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8541 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8542 in self.wanted_names]
8544 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8545 """Returns the status of a block device
8548 if self.op.static or not node:
8551 self.cfg.SetDiskID(dev, node)
8553 result = self.rpc.call_blockdev_find(node, dev)
8557 result.Raise("Can't compute disk status for %s" % instance_name)
8559 status = result.payload
8563 return (status.dev_path, status.major, status.minor,
8564 status.sync_percent, status.estimated_time,
8565 status.is_degraded, status.ldisk_status)
8567 def _ComputeDiskStatus(self, instance, snode, dev):
8568 """Compute block device status.
8571 if dev.dev_type in constants.LDS_DRBD:
8572 # we change the snode then (otherwise we use the one passed in)
8573 if dev.logical_id[0] == instance.primary_node:
8574 snode = dev.logical_id[1]
8576 snode = dev.logical_id[0]
8578 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8580 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8583 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8584 for child in dev.children]
8589 "iv_name": dev.iv_name,
8590 "dev_type": dev.dev_type,
8591 "logical_id": dev.logical_id,
8592 "physical_id": dev.physical_id,
8593 "pstatus": dev_pstatus,
8594 "sstatus": dev_sstatus,
8595 "children": dev_children,
8602 def Exec(self, feedback_fn):
8603 """Gather and return data"""
8606 cluster = self.cfg.GetClusterInfo()
8608 for instance in self.wanted_instances:
8609 if not self.op.static:
8610 remote_info = self.rpc.call_instance_info(instance.primary_node,
8612 instance.hypervisor)
8613 remote_info.Raise("Error checking node %s" % instance.primary_node)
8614 remote_info = remote_info.payload
8615 if remote_info and "state" in remote_info:
8618 remote_state = "down"
8621 if instance.admin_up:
8624 config_state = "down"
8626 disks = [self._ComputeDiskStatus(instance, None, device)
8627 for device in instance.disks]
8630 "name": instance.name,
8631 "config_state": config_state,
8632 "run_state": remote_state,
8633 "pnode": instance.primary_node,
8634 "snodes": instance.secondary_nodes,
8636 # this happens to be the same format used for hooks
8637 "nics": _NICListToTuple(self, instance.nics),
8638 "disk_template": instance.disk_template,
8640 "hypervisor": instance.hypervisor,
8641 "network_port": instance.network_port,
8642 "hv_instance": instance.hvparams,
8643 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8644 "be_instance": instance.beparams,
8645 "be_actual": cluster.FillBE(instance),
8646 "os_instance": instance.osparams,
8647 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8648 "serial_no": instance.serial_no,
8649 "mtime": instance.mtime,
8650 "ctime": instance.ctime,
8651 "uuid": instance.uuid,
8654 result[instance.name] = idict
8659 class LUSetInstanceParams(LogicalUnit):
8660 """Modifies an instances's parameters.
8663 HPATH = "instance-modify"
8664 HTYPE = constants.HTYPE_INSTANCE
8667 ("nics", _EmptyList, _TList),
8668 ("disks", _EmptyList, _TList),
8669 ("beparams", _EmptyDict, _TDict),
8670 ("hvparams", _EmptyDict, _TDict),
8671 ("disk_template", None, _TMaybeString),
8672 ("remote_node", None, _TMaybeString),
8673 ("os_name", None, _TMaybeString),
8674 ("force_variant", False, _TBool),
8675 ("osparams", None, _TOr(_TDict, _TNone)),
8680 def CheckArguments(self):
8681 if not (self.op.nics or self.op.disks or self.op.disk_template or
8682 self.op.hvparams or self.op.beparams or self.op.os_name):
8683 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8685 if self.op.hvparams:
8686 _CheckGlobalHvParams(self.op.hvparams)
8690 for disk_op, disk_dict in self.op.disks:
8691 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8692 if disk_op == constants.DDM_REMOVE:
8695 elif disk_op == constants.DDM_ADD:
8698 if not isinstance(disk_op, int):
8699 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8700 if not isinstance(disk_dict, dict):
8701 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8702 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8704 if disk_op == constants.DDM_ADD:
8705 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8706 if mode not in constants.DISK_ACCESS_SET:
8707 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8709 size = disk_dict.get('size', None)
8711 raise errors.OpPrereqError("Required disk parameter size missing",
8715 except (TypeError, ValueError), err:
8716 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8717 str(err), errors.ECODE_INVAL)
8718 disk_dict['size'] = size
8720 # modification of disk
8721 if 'size' in disk_dict:
8722 raise errors.OpPrereqError("Disk size change not possible, use"
8723 " grow-disk", errors.ECODE_INVAL)
8725 if disk_addremove > 1:
8726 raise errors.OpPrereqError("Only one disk add or remove operation"
8727 " supported at a time", errors.ECODE_INVAL)
8729 if self.op.disks and self.op.disk_template is not None:
8730 raise errors.OpPrereqError("Disk template conversion and other disk"
8731 " changes not supported at the same time",
8734 if self.op.disk_template:
8735 _CheckDiskTemplate(self.op.disk_template)
8736 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8737 self.op.remote_node is None):
8738 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8739 " one requires specifying a secondary node",
8744 for nic_op, nic_dict in self.op.nics:
8745 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8746 if nic_op == constants.DDM_REMOVE:
8749 elif nic_op == constants.DDM_ADD:
8752 if not isinstance(nic_op, int):
8753 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8754 if not isinstance(nic_dict, dict):
8755 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8756 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8758 # nic_dict should be a dict
8759 nic_ip = nic_dict.get('ip', None)
8760 if nic_ip is not None:
8761 if nic_ip.lower() == constants.VALUE_NONE:
8762 nic_dict['ip'] = None
8764 if not netutils.IsValidIP4(nic_ip):
8765 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8768 nic_bridge = nic_dict.get('bridge', None)
8769 nic_link = nic_dict.get('link', None)
8770 if nic_bridge and nic_link:
8771 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8772 " at the same time", errors.ECODE_INVAL)
8773 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8774 nic_dict['bridge'] = None
8775 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8776 nic_dict['link'] = None
8778 if nic_op == constants.DDM_ADD:
8779 nic_mac = nic_dict.get('mac', None)
8781 nic_dict['mac'] = constants.VALUE_AUTO
8783 if 'mac' in nic_dict:
8784 nic_mac = nic_dict['mac']
8785 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8786 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8788 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8789 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8790 " modifying an existing nic",
8793 if nic_addremove > 1:
8794 raise errors.OpPrereqError("Only one NIC add or remove operation"
8795 " supported at a time", errors.ECODE_INVAL)
8797 def ExpandNames(self):
8798 self._ExpandAndLockInstance()
8799 self.needed_locks[locking.LEVEL_NODE] = []
8800 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8802 def DeclareLocks(self, level):
8803 if level == locking.LEVEL_NODE:
8804 self._LockInstancesNodes()
8805 if self.op.disk_template and self.op.remote_node:
8806 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8807 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8809 def BuildHooksEnv(self):
8812 This runs on the master, primary and secondaries.
8816 if constants.BE_MEMORY in self.be_new:
8817 args['memory'] = self.be_new[constants.BE_MEMORY]
8818 if constants.BE_VCPUS in self.be_new:
8819 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8820 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8821 # information at all.
8824 nic_override = dict(self.op.nics)
8825 for idx, nic in enumerate(self.instance.nics):
8826 if idx in nic_override:
8827 this_nic_override = nic_override[idx]
8829 this_nic_override = {}
8830 if 'ip' in this_nic_override:
8831 ip = this_nic_override['ip']
8834 if 'mac' in this_nic_override:
8835 mac = this_nic_override['mac']
8838 if idx in self.nic_pnew:
8839 nicparams = self.nic_pnew[idx]
8841 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8842 mode = nicparams[constants.NIC_MODE]
8843 link = nicparams[constants.NIC_LINK]
8844 args['nics'].append((ip, mac, mode, link))
8845 if constants.DDM_ADD in nic_override:
8846 ip = nic_override[constants.DDM_ADD].get('ip', None)
8847 mac = nic_override[constants.DDM_ADD]['mac']
8848 nicparams = self.nic_pnew[constants.DDM_ADD]
8849 mode = nicparams[constants.NIC_MODE]
8850 link = nicparams[constants.NIC_LINK]
8851 args['nics'].append((ip, mac, mode, link))
8852 elif constants.DDM_REMOVE in nic_override:
8853 del args['nics'][-1]
8855 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8856 if self.op.disk_template:
8857 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8858 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8861 def CheckPrereq(self):
8862 """Check prerequisites.
8864 This only checks the instance list against the existing names.
8867 # checking the new params on the primary/secondary nodes
8869 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8870 cluster = self.cluster = self.cfg.GetClusterInfo()
8871 assert self.instance is not None, \
8872 "Cannot retrieve locked instance %s" % self.op.instance_name
8873 pnode = instance.primary_node
8874 nodelist = list(instance.all_nodes)
8877 if self.op.os_name and not self.op.force:
8878 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8879 self.op.force_variant)
8880 instance_os = self.op.os_name
8882 instance_os = instance.os
8884 if self.op.disk_template:
8885 if instance.disk_template == self.op.disk_template:
8886 raise errors.OpPrereqError("Instance already has disk template %s" %
8887 instance.disk_template, errors.ECODE_INVAL)
8889 if (instance.disk_template,
8890 self.op.disk_template) not in self._DISK_CONVERSIONS:
8891 raise errors.OpPrereqError("Unsupported disk template conversion from"
8892 " %s to %s" % (instance.disk_template,
8893 self.op.disk_template),
8895 _CheckInstanceDown(self, instance, "cannot change disk template")
8896 if self.op.disk_template in constants.DTS_NET_MIRROR:
8897 if self.op.remote_node == pnode:
8898 raise errors.OpPrereqError("Given new secondary node %s is the same"
8899 " as the primary node of the instance" %
8900 self.op.remote_node, errors.ECODE_STATE)
8901 _CheckNodeOnline(self, self.op.remote_node)
8902 _CheckNodeNotDrained(self, self.op.remote_node)
8903 disks = [{"size": d.size} for d in instance.disks]
8904 required = _ComputeDiskSize(self.op.disk_template, disks)
8905 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8907 # hvparams processing
8908 if self.op.hvparams:
8909 hv_type = instance.hypervisor
8910 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8911 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8912 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8915 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8916 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8917 self.hv_new = hv_new # the new actual values
8918 self.hv_inst = i_hvdict # the new dict (without defaults)
8920 self.hv_new = self.hv_inst = {}
8922 # beparams processing
8923 if self.op.beparams:
8924 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8926 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8927 be_new = cluster.SimpleFillBE(i_bedict)
8928 self.be_new = be_new # the new actual values
8929 self.be_inst = i_bedict # the new dict (without defaults)
8931 self.be_new = self.be_inst = {}
8933 # osparams processing
8934 if self.op.osparams:
8935 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8936 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8937 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8938 self.os_inst = i_osdict # the new dict (without defaults)
8940 self.os_new = self.os_inst = {}
8944 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8945 mem_check_list = [pnode]
8946 if be_new[constants.BE_AUTO_BALANCE]:
8947 # either we changed auto_balance to yes or it was from before
8948 mem_check_list.extend(instance.secondary_nodes)
8949 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8950 instance.hypervisor)
8951 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8952 instance.hypervisor)
8953 pninfo = nodeinfo[pnode]
8954 msg = pninfo.fail_msg
8956 # Assume the primary node is unreachable and go ahead
8957 self.warn.append("Can't get info from primary node %s: %s" %
8959 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8960 self.warn.append("Node data from primary node %s doesn't contain"
8961 " free memory information" % pnode)
8962 elif instance_info.fail_msg:
8963 self.warn.append("Can't get instance runtime information: %s" %
8964 instance_info.fail_msg)
8966 if instance_info.payload:
8967 current_mem = int(instance_info.payload['memory'])
8969 # Assume instance not running
8970 # (there is a slight race condition here, but it's not very probable,
8971 # and we have no other way to check)
8973 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8974 pninfo.payload['memory_free'])
8976 raise errors.OpPrereqError("This change will prevent the instance"
8977 " from starting, due to %d MB of memory"
8978 " missing on its primary node" % miss_mem,
8981 if be_new[constants.BE_AUTO_BALANCE]:
8982 for node, nres in nodeinfo.items():
8983 if node not in instance.secondary_nodes:
8987 self.warn.append("Can't get info from secondary node %s: %s" %
8989 elif not isinstance(nres.payload.get('memory_free', None), int):
8990 self.warn.append("Secondary node %s didn't return free"
8991 " memory information" % node)
8992 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8993 self.warn.append("Not enough memory to failover instance to"
8994 " secondary node %s" % node)
8999 for nic_op, nic_dict in self.op.nics:
9000 if nic_op == constants.DDM_REMOVE:
9001 if not instance.nics:
9002 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9005 if nic_op != constants.DDM_ADD:
9007 if not instance.nics:
9008 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9009 " no NICs" % nic_op,
9011 if nic_op < 0 or nic_op >= len(instance.nics):
9012 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9014 (nic_op, len(instance.nics) - 1),
9016 old_nic_params = instance.nics[nic_op].nicparams
9017 old_nic_ip = instance.nics[nic_op].ip
9022 update_params_dict = dict([(key, nic_dict[key])
9023 for key in constants.NICS_PARAMETERS
9024 if key in nic_dict])
9026 if 'bridge' in nic_dict:
9027 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9029 new_nic_params = _GetUpdatedParams(old_nic_params,
9031 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9032 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9033 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9034 self.nic_pinst[nic_op] = new_nic_params
9035 self.nic_pnew[nic_op] = new_filled_nic_params
9036 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9038 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9039 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9040 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9042 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9044 self.warn.append(msg)
9046 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9047 if new_nic_mode == constants.NIC_MODE_ROUTED:
9048 if 'ip' in nic_dict:
9049 nic_ip = nic_dict['ip']
9053 raise errors.OpPrereqError('Cannot set the nic ip to None'
9054 ' on a routed nic', errors.ECODE_INVAL)
9055 if 'mac' in nic_dict:
9056 nic_mac = nic_dict['mac']
9058 raise errors.OpPrereqError('Cannot set the nic mac to None',
9060 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9061 # otherwise generate the mac
9062 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9064 # or validate/reserve the current one
9066 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9067 except errors.ReservationError:
9068 raise errors.OpPrereqError("MAC address %s already in use"
9069 " in cluster" % nic_mac,
9070 errors.ECODE_NOTUNIQUE)
9073 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9074 raise errors.OpPrereqError("Disk operations not supported for"
9075 " diskless instances",
9077 for disk_op, _ in self.op.disks:
9078 if disk_op == constants.DDM_REMOVE:
9079 if len(instance.disks) == 1:
9080 raise errors.OpPrereqError("Cannot remove the last disk of"
9081 " an instance", errors.ECODE_INVAL)
9082 _CheckInstanceDown(self, instance, "cannot remove disks")
9084 if (disk_op == constants.DDM_ADD and
9085 len(instance.nics) >= constants.MAX_DISKS):
9086 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9087 " add more" % constants.MAX_DISKS,
9089 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9091 if disk_op < 0 or disk_op >= len(instance.disks):
9092 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9094 (disk_op, len(instance.disks)),
9099 def _ConvertPlainToDrbd(self, feedback_fn):
9100 """Converts an instance from plain to drbd.
9103 feedback_fn("Converting template to drbd")
9104 instance = self.instance
9105 pnode = instance.primary_node
9106 snode = self.op.remote_node
9108 # create a fake disk info for _GenerateDiskTemplate
9109 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9110 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9111 instance.name, pnode, [snode],
9112 disk_info, None, None, 0)
9113 info = _GetInstanceInfoText(instance)
9114 feedback_fn("Creating aditional volumes...")
9115 # first, create the missing data and meta devices
9116 for disk in new_disks:
9117 # unfortunately this is... not too nice
9118 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9120 for child in disk.children:
9121 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9122 # at this stage, all new LVs have been created, we can rename the
9124 feedback_fn("Renaming original volumes...")
9125 rename_list = [(o, n.children[0].logical_id)
9126 for (o, n) in zip(instance.disks, new_disks)]
9127 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9128 result.Raise("Failed to rename original LVs")
9130 feedback_fn("Initializing DRBD devices...")
9131 # all child devices are in place, we can now create the DRBD devices
9132 for disk in new_disks:
9133 for node in [pnode, snode]:
9134 f_create = node == pnode
9135 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9137 # at this point, the instance has been modified
9138 instance.disk_template = constants.DT_DRBD8
9139 instance.disks = new_disks
9140 self.cfg.Update(instance, feedback_fn)
9142 # disks are created, waiting for sync
9143 disk_abort = not _WaitForSync(self, instance)
9145 raise errors.OpExecError("There are some degraded disks for"
9146 " this instance, please cleanup manually")
9148 def _ConvertDrbdToPlain(self, feedback_fn):
9149 """Converts an instance from drbd to plain.
9152 instance = self.instance
9153 assert len(instance.secondary_nodes) == 1
9154 pnode = instance.primary_node
9155 snode = instance.secondary_nodes[0]
9156 feedback_fn("Converting template to plain")
9158 old_disks = instance.disks
9159 new_disks = [d.children[0] for d in old_disks]
9161 # copy over size and mode
9162 for parent, child in zip(old_disks, new_disks):
9163 child.size = parent.size
9164 child.mode = parent.mode
9166 # update instance structure
9167 instance.disks = new_disks
9168 instance.disk_template = constants.DT_PLAIN
9169 self.cfg.Update(instance, feedback_fn)
9171 feedback_fn("Removing volumes on the secondary node...")
9172 for disk in old_disks:
9173 self.cfg.SetDiskID(disk, snode)
9174 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9176 self.LogWarning("Could not remove block device %s on node %s,"
9177 " continuing anyway: %s", disk.iv_name, snode, msg)
9179 feedback_fn("Removing unneeded volumes on the primary node...")
9180 for idx, disk in enumerate(old_disks):
9181 meta = disk.children[1]
9182 self.cfg.SetDiskID(meta, pnode)
9183 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9185 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9186 " continuing anyway: %s", idx, pnode, msg)
9189 def Exec(self, feedback_fn):
9190 """Modifies an instance.
9192 All parameters take effect only at the next restart of the instance.
9195 # Process here the warnings from CheckPrereq, as we don't have a
9196 # feedback_fn there.
9197 for warn in self.warn:
9198 feedback_fn("WARNING: %s" % warn)
9201 instance = self.instance
9203 for disk_op, disk_dict in self.op.disks:
9204 if disk_op == constants.DDM_REMOVE:
9205 # remove the last disk
9206 device = instance.disks.pop()
9207 device_idx = len(instance.disks)
9208 for node, disk in device.ComputeNodeTree(instance.primary_node):
9209 self.cfg.SetDiskID(disk, node)
9210 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9212 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9213 " continuing anyway", device_idx, node, msg)
9214 result.append(("disk/%d" % device_idx, "remove"))
9215 elif disk_op == constants.DDM_ADD:
9217 if instance.disk_template == constants.DT_FILE:
9218 file_driver, file_path = instance.disks[0].logical_id
9219 file_path = os.path.dirname(file_path)
9221 file_driver = file_path = None
9222 disk_idx_base = len(instance.disks)
9223 new_disk = _GenerateDiskTemplate(self,
9224 instance.disk_template,
9225 instance.name, instance.primary_node,
9226 instance.secondary_nodes,
9231 instance.disks.append(new_disk)
9232 info = _GetInstanceInfoText(instance)
9234 logging.info("Creating volume %s for instance %s",
9235 new_disk.iv_name, instance.name)
9236 # Note: this needs to be kept in sync with _CreateDisks
9238 for node in instance.all_nodes:
9239 f_create = node == instance.primary_node
9241 _CreateBlockDev(self, node, instance, new_disk,
9242 f_create, info, f_create)
9243 except errors.OpExecError, err:
9244 self.LogWarning("Failed to create volume %s (%s) on"
9246 new_disk.iv_name, new_disk, node, err)
9247 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9248 (new_disk.size, new_disk.mode)))
9250 # change a given disk
9251 instance.disks[disk_op].mode = disk_dict['mode']
9252 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9254 if self.op.disk_template:
9255 r_shut = _ShutdownInstanceDisks(self, instance)
9257 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9258 " proceed with disk template conversion")
9259 mode = (instance.disk_template, self.op.disk_template)
9261 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9263 self.cfg.ReleaseDRBDMinors(instance.name)
9265 result.append(("disk_template", self.op.disk_template))
9268 for nic_op, nic_dict in self.op.nics:
9269 if nic_op == constants.DDM_REMOVE:
9270 # remove the last nic
9271 del instance.nics[-1]
9272 result.append(("nic.%d" % len(instance.nics), "remove"))
9273 elif nic_op == constants.DDM_ADD:
9274 # mac and bridge should be set, by now
9275 mac = nic_dict['mac']
9276 ip = nic_dict.get('ip', None)
9277 nicparams = self.nic_pinst[constants.DDM_ADD]
9278 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9279 instance.nics.append(new_nic)
9280 result.append(("nic.%d" % (len(instance.nics) - 1),
9281 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9282 (new_nic.mac, new_nic.ip,
9283 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9284 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9287 for key in 'mac', 'ip':
9289 setattr(instance.nics[nic_op], key, nic_dict[key])
9290 if nic_op in self.nic_pinst:
9291 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9292 for key, val in nic_dict.iteritems():
9293 result.append(("nic.%s/%d" % (key, nic_op), val))
9296 if self.op.hvparams:
9297 instance.hvparams = self.hv_inst
9298 for key, val in self.op.hvparams.iteritems():
9299 result.append(("hv/%s" % key, val))
9302 if self.op.beparams:
9303 instance.beparams = self.be_inst
9304 for key, val in self.op.beparams.iteritems():
9305 result.append(("be/%s" % key, val))
9309 instance.os = self.op.os_name
9312 if self.op.osparams:
9313 instance.osparams = self.os_inst
9314 for key, val in self.op.osparams.iteritems():
9315 result.append(("os/%s" % key, val))
9317 self.cfg.Update(instance, feedback_fn)
9321 _DISK_CONVERSIONS = {
9322 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9323 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9327 class LUQueryExports(NoHooksLU):
9328 """Query the exports list
9332 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9333 ("use_locking", False, _TBool),
9337 def ExpandNames(self):
9338 self.needed_locks = {}
9339 self.share_locks[locking.LEVEL_NODE] = 1
9340 if not self.op.nodes:
9341 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9343 self.needed_locks[locking.LEVEL_NODE] = \
9344 _GetWantedNodes(self, self.op.nodes)
9346 def Exec(self, feedback_fn):
9347 """Compute the list of all the exported system images.
9350 @return: a dictionary with the structure node->(export-list)
9351 where export-list is a list of the instances exported on
9355 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9356 rpcresult = self.rpc.call_export_list(self.nodes)
9358 for node in rpcresult:
9359 if rpcresult[node].fail_msg:
9360 result[node] = False
9362 result[node] = rpcresult[node].payload
9367 class LUPrepareExport(NoHooksLU):
9368 """Prepares an instance for an export and returns useful information.
9373 ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9377 def ExpandNames(self):
9378 self._ExpandAndLockInstance()
9380 def CheckPrereq(self):
9381 """Check prerequisites.
9384 instance_name = self.op.instance_name
9386 self.instance = self.cfg.GetInstanceInfo(instance_name)
9387 assert self.instance is not None, \
9388 "Cannot retrieve locked instance %s" % self.op.instance_name
9389 _CheckNodeOnline(self, self.instance.primary_node)
9391 self._cds = _GetClusterDomainSecret()
9393 def Exec(self, feedback_fn):
9394 """Prepares an instance for an export.
9397 instance = self.instance
9399 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9400 salt = utils.GenerateSecret(8)
9402 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9403 result = self.rpc.call_x509_cert_create(instance.primary_node,
9404 constants.RIE_CERT_VALIDITY)
9405 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9407 (name, cert_pem) = result.payload
9409 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9413 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9414 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9416 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9422 class LUExportInstance(LogicalUnit):
9423 """Export an instance to an image in the cluster.
9426 HPATH = "instance-export"
9427 HTYPE = constants.HTYPE_INSTANCE
9430 ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9431 ("shutdown", True, _TBool),
9433 ("remove_instance", False, _TBool),
9434 ("ignore_remove_failures", False, _TBool),
9435 ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9436 ("x509_key_name", None, _TOr(_TList, _TNone)),
9437 ("destination_x509_ca", None, _TMaybeString),
9441 def CheckArguments(self):
9442 """Check the arguments.
9445 self.x509_key_name = self.op.x509_key_name
9446 self.dest_x509_ca_pem = self.op.destination_x509_ca
9448 if self.op.remove_instance and not self.op.shutdown:
9449 raise errors.OpPrereqError("Can not remove instance without shutting it"
9452 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9453 if not self.x509_key_name:
9454 raise errors.OpPrereqError("Missing X509 key name for encryption",
9457 if not self.dest_x509_ca_pem:
9458 raise errors.OpPrereqError("Missing destination X509 CA",
9461 def ExpandNames(self):
9462 self._ExpandAndLockInstance()
9464 # Lock all nodes for local exports
9465 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9466 # FIXME: lock only instance primary and destination node
9468 # Sad but true, for now we have do lock all nodes, as we don't know where
9469 # the previous export might be, and in this LU we search for it and
9470 # remove it from its current node. In the future we could fix this by:
9471 # - making a tasklet to search (share-lock all), then create the
9472 # new one, then one to remove, after
9473 # - removing the removal operation altogether
9474 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9476 def DeclareLocks(self, level):
9477 """Last minute lock declaration."""
9478 # All nodes are locked anyway, so nothing to do here.
9480 def BuildHooksEnv(self):
9483 This will run on the master, primary node and target node.
9487 "EXPORT_MODE": self.op.mode,
9488 "EXPORT_NODE": self.op.target_node,
9489 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9490 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9491 # TODO: Generic function for boolean env variables
9492 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9495 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9497 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9499 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9500 nl.append(self.op.target_node)
9504 def CheckPrereq(self):
9505 """Check prerequisites.
9507 This checks that the instance and node names are valid.
9510 instance_name = self.op.instance_name
9512 self.instance = self.cfg.GetInstanceInfo(instance_name)
9513 assert self.instance is not None, \
9514 "Cannot retrieve locked instance %s" % self.op.instance_name
9515 _CheckNodeOnline(self, self.instance.primary_node)
9517 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9518 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9519 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9520 assert self.dst_node is not None
9522 _CheckNodeOnline(self, self.dst_node.name)
9523 _CheckNodeNotDrained(self, self.dst_node.name)
9526 self.dest_disk_info = None
9527 self.dest_x509_ca = None
9529 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9530 self.dst_node = None
9532 if len(self.op.target_node) != len(self.instance.disks):
9533 raise errors.OpPrereqError(("Received destination information for %s"
9534 " disks, but instance %s has %s disks") %
9535 (len(self.op.target_node), instance_name,
9536 len(self.instance.disks)),
9539 cds = _GetClusterDomainSecret()
9541 # Check X509 key name
9543 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9544 except (TypeError, ValueError), err:
9545 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9547 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9548 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9551 # Load and verify CA
9553 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9554 except OpenSSL.crypto.Error, err:
9555 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9556 (err, ), errors.ECODE_INVAL)
9558 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9559 if errcode is not None:
9560 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9561 (msg, ), errors.ECODE_INVAL)
9563 self.dest_x509_ca = cert
9565 # Verify target information
9567 for idx, disk_data in enumerate(self.op.target_node):
9569 (host, port, magic) = \
9570 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9571 except errors.GenericError, err:
9572 raise errors.OpPrereqError("Target info for disk %s: %s" %
9573 (idx, err), errors.ECODE_INVAL)
9575 disk_info.append((host, port, magic))
9577 assert len(disk_info) == len(self.op.target_node)
9578 self.dest_disk_info = disk_info
9581 raise errors.ProgrammerError("Unhandled export mode %r" %
9584 # instance disk type verification
9585 # TODO: Implement export support for file-based disks
9586 for disk in self.instance.disks:
9587 if disk.dev_type == constants.LD_FILE:
9588 raise errors.OpPrereqError("Export not supported for instances with"
9589 " file-based disks", errors.ECODE_INVAL)
9591 def _CleanupExports(self, feedback_fn):
9592 """Removes exports of current instance from all other nodes.
9594 If an instance in a cluster with nodes A..D was exported to node C, its
9595 exports will be removed from the nodes A, B and D.
9598 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9600 nodelist = self.cfg.GetNodeList()
9601 nodelist.remove(self.dst_node.name)
9603 # on one-node clusters nodelist will be empty after the removal
9604 # if we proceed the backup would be removed because OpQueryExports
9605 # substitutes an empty list with the full cluster node list.
9606 iname = self.instance.name
9608 feedback_fn("Removing old exports for instance %s" % iname)
9609 exportlist = self.rpc.call_export_list(nodelist)
9610 for node in exportlist:
9611 if exportlist[node].fail_msg:
9613 if iname in exportlist[node].payload:
9614 msg = self.rpc.call_export_remove(node, iname).fail_msg
9616 self.LogWarning("Could not remove older export for instance %s"
9617 " on node %s: %s", iname, node, msg)
9619 def Exec(self, feedback_fn):
9620 """Export an instance to an image in the cluster.
9623 assert self.op.mode in constants.EXPORT_MODES
9625 instance = self.instance
9626 src_node = instance.primary_node
9628 if self.op.shutdown:
9629 # shutdown the instance, but not the disks
9630 feedback_fn("Shutting down instance %s" % instance.name)
9631 result = self.rpc.call_instance_shutdown(src_node, instance,
9632 self.op.shutdown_timeout)
9633 # TODO: Maybe ignore failures if ignore_remove_failures is set
9634 result.Raise("Could not shutdown instance %s on"
9635 " node %s" % (instance.name, src_node))
9637 # set the disks ID correctly since call_instance_start needs the
9638 # correct drbd minor to create the symlinks
9639 for disk in instance.disks:
9640 self.cfg.SetDiskID(disk, src_node)
9642 activate_disks = (not instance.admin_up)
9645 # Activate the instance disks if we'exporting a stopped instance
9646 feedback_fn("Activating disks for %s" % instance.name)
9647 _StartInstanceDisks(self, instance, None)
9650 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9653 helper.CreateSnapshots()
9655 if (self.op.shutdown and instance.admin_up and
9656 not self.op.remove_instance):
9657 assert not activate_disks
9658 feedback_fn("Starting instance %s" % instance.name)
9659 result = self.rpc.call_instance_start(src_node, instance, None, None)
9660 msg = result.fail_msg
9662 feedback_fn("Failed to start instance: %s" % msg)
9663 _ShutdownInstanceDisks(self, instance)
9664 raise errors.OpExecError("Could not start instance: %s" % msg)
9666 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9667 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9668 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9669 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9670 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9672 (key_name, _, _) = self.x509_key_name
9675 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9678 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9679 key_name, dest_ca_pem,
9684 # Check for backwards compatibility
9685 assert len(dresults) == len(instance.disks)
9686 assert compat.all(isinstance(i, bool) for i in dresults), \
9687 "Not all results are boolean: %r" % dresults
9691 feedback_fn("Deactivating disks for %s" % instance.name)
9692 _ShutdownInstanceDisks(self, instance)
9694 if not (compat.all(dresults) and fin_resu):
9697 failures.append("export finalization")
9698 if not compat.all(dresults):
9699 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9701 failures.append("disk export: disk(s) %s" % fdsk)
9703 raise errors.OpExecError("Export failed, errors in %s" %
9704 utils.CommaJoin(failures))
9706 # At this point, the export was successful, we can cleanup/finish
9708 # Remove instance if requested
9709 if self.op.remove_instance:
9710 feedback_fn("Removing instance %s" % instance.name)
9711 _RemoveInstance(self, feedback_fn, instance,
9712 self.op.ignore_remove_failures)
9714 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9715 self._CleanupExports(feedback_fn)
9717 return fin_resu, dresults
9720 class LURemoveExport(NoHooksLU):
9721 """Remove exports related to the named instance.
9729 def ExpandNames(self):
9730 self.needed_locks = {}
9731 # We need all nodes to be locked in order for RemoveExport to work, but we
9732 # don't need to lock the instance itself, as nothing will happen to it (and
9733 # we can remove exports also for a removed instance)
9734 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9736 def Exec(self, feedback_fn):
9737 """Remove any export.
9740 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9741 # If the instance was not found we'll try with the name that was passed in.
9742 # This will only work if it was an FQDN, though.
9744 if not instance_name:
9746 instance_name = self.op.instance_name
9748 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9749 exportlist = self.rpc.call_export_list(locked_nodes)
9751 for node in exportlist:
9752 msg = exportlist[node].fail_msg
9754 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9756 if instance_name in exportlist[node].payload:
9758 result = self.rpc.call_export_remove(node, instance_name)
9759 msg = result.fail_msg
9761 logging.error("Could not remove export for instance %s"
9762 " on node %s: %s", instance_name, node, msg)
9764 if fqdn_warn and not found:
9765 feedback_fn("Export not found. If trying to remove an export belonging"
9766 " to a deleted instance please use its Fully Qualified"
9770 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9773 This is an abstract class which is the parent of all the other tags LUs.
9777 def ExpandNames(self):
9778 self.needed_locks = {}
9779 if self.op.kind == constants.TAG_NODE:
9780 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9781 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9782 elif self.op.kind == constants.TAG_INSTANCE:
9783 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9784 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9786 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
9787 # not possible to acquire the BGL based on opcode parameters)
9789 def CheckPrereq(self):
9790 """Check prerequisites.
9793 if self.op.kind == constants.TAG_CLUSTER:
9794 self.target = self.cfg.GetClusterInfo()
9795 elif self.op.kind == constants.TAG_NODE:
9796 self.target = self.cfg.GetNodeInfo(self.op.name)
9797 elif self.op.kind == constants.TAG_INSTANCE:
9798 self.target = self.cfg.GetInstanceInfo(self.op.name)
9800 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9801 str(self.op.kind), errors.ECODE_INVAL)
9804 class LUGetTags(TagsLU):
9805 """Returns the tags of a given object.
9809 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9810 # Name is only meaningful for nodes and instances
9811 ("name", _NoDefault, _TMaybeString),
9815 def ExpandNames(self):
9816 TagsLU.ExpandNames(self)
9818 # Share locks as this is only a read operation
9819 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9821 def Exec(self, feedback_fn):
9822 """Returns the tag list.
9825 return list(self.target.GetTags())
9828 class LUSearchTags(NoHooksLU):
9829 """Searches the tags for a given pattern.
9833 ("pattern", _NoDefault, _TNonEmptyString),
9837 def ExpandNames(self):
9838 self.needed_locks = {}
9840 def CheckPrereq(self):
9841 """Check prerequisites.
9843 This checks the pattern passed for validity by compiling it.
9847 self.re = re.compile(self.op.pattern)
9848 except re.error, err:
9849 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9850 (self.op.pattern, err), errors.ECODE_INVAL)
9852 def Exec(self, feedback_fn):
9853 """Returns the tag list.
9857 tgts = [("/cluster", cfg.GetClusterInfo())]
9858 ilist = cfg.GetAllInstancesInfo().values()
9859 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9860 nlist = cfg.GetAllNodesInfo().values()
9861 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9863 for path, target in tgts:
9864 for tag in target.GetTags():
9865 if self.re.search(tag):
9866 results.append((path, tag))
9870 class LUAddTags(TagsLU):
9871 """Sets a tag on a given object.
9875 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9876 # Name is only meaningful for nodes and instances
9877 ("name", _NoDefault, _TMaybeString),
9878 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9882 def CheckPrereq(self):
9883 """Check prerequisites.
9885 This checks the type and length of the tag name and value.
9888 TagsLU.CheckPrereq(self)
9889 for tag in self.op.tags:
9890 objects.TaggableObject.ValidateTag(tag)
9892 def Exec(self, feedback_fn):
9897 for tag in self.op.tags:
9898 self.target.AddTag(tag)
9899 except errors.TagError, err:
9900 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9901 self.cfg.Update(self.target, feedback_fn)
9904 class LUDelTags(TagsLU):
9905 """Delete a list of tags from a given object.
9909 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9910 # Name is only meaningful for nodes and instances
9911 ("name", _NoDefault, _TMaybeString),
9912 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9916 def CheckPrereq(self):
9917 """Check prerequisites.
9919 This checks that we have the given tag.
9922 TagsLU.CheckPrereq(self)
9923 for tag in self.op.tags:
9924 objects.TaggableObject.ValidateTag(tag)
9925 del_tags = frozenset(self.op.tags)
9926 cur_tags = self.target.GetTags()
9928 diff_tags = del_tags - cur_tags
9930 diff_names = ("'%s'" % i for i in sorted(diff_tags))
9931 raise errors.OpPrereqError("Tag(s) %s not found" %
9932 (utils.CommaJoin(diff_names), ),
9935 def Exec(self, feedback_fn):
9936 """Remove the tag from the object.
9939 for tag in self.op.tags:
9940 self.target.RemoveTag(tag)
9941 self.cfg.Update(self.target, feedback_fn)
9944 class LUTestDelay(NoHooksLU):
9945 """Sleep for a specified amount of time.
9947 This LU sleeps on the master and/or nodes for a specified amount of
9952 ("duration", _NoDefault, _TFloat),
9953 ("on_master", True, _TBool),
9954 ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9955 ("repeat", 0, _TPositiveInt)
9959 def ExpandNames(self):
9960 """Expand names and set required locks.
9962 This expands the node list, if any.
9965 self.needed_locks = {}
9966 if self.op.on_nodes:
9967 # _GetWantedNodes can be used here, but is not always appropriate to use
9968 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9970 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9971 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9973 def _TestDelay(self):
9974 """Do the actual sleep.
9977 if self.op.on_master:
9978 if not utils.TestDelay(self.op.duration):
9979 raise errors.OpExecError("Error during master delay test")
9980 if self.op.on_nodes:
9981 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9982 for node, node_result in result.items():
9983 node_result.Raise("Failure during rpc call to node %s" % node)
9985 def Exec(self, feedback_fn):
9986 """Execute the test delay opcode, with the wanted repetitions.
9989 if self.op.repeat == 0:
9992 top_value = self.op.repeat - 1
9993 for i in range(self.op.repeat):
9994 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9998 class LUTestJobqueue(NoHooksLU):
9999 """Utility LU to test some aspects of the job queue.
10003 ("notify_waitlock", False, _TBool),
10004 ("notify_exec", False, _TBool),
10005 ("log_messages", _EmptyList, _TListOf(_TString)),
10006 ("fail", False, _TBool),
10010 # Must be lower than default timeout for WaitForJobChange to see whether it
10011 # notices changed jobs
10012 _CLIENT_CONNECT_TIMEOUT = 20.0
10013 _CLIENT_CONFIRM_TIMEOUT = 60.0
10016 def _NotifyUsingSocket(cls, cb, errcls):
10017 """Opens a Unix socket and waits for another program to connect.
10020 @param cb: Callback to send socket name to client
10021 @type errcls: class
10022 @param errcls: Exception class to use for errors
10025 # Using a temporary directory as there's no easy way to create temporary
10026 # sockets without writing a custom loop around tempfile.mktemp and
10028 tmpdir = tempfile.mkdtemp()
10030 tmpsock = utils.PathJoin(tmpdir, "sock")
10032 logging.debug("Creating temporary socket at %s", tmpsock)
10033 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10038 # Send details to client
10041 # Wait for client to connect before continuing
10042 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10044 (conn, _) = sock.accept()
10045 except socket.error, err:
10046 raise errcls("Client didn't connect in time (%s)" % err)
10050 # Remove as soon as client is connected
10051 shutil.rmtree(tmpdir)
10053 # Wait for client to close
10056 # pylint: disable-msg=E1101
10057 # Instance of '_socketobject' has no ... member
10058 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10060 except socket.error, err:
10061 raise errcls("Client failed to confirm notification (%s)" % err)
10065 def _SendNotification(self, test, arg, sockname):
10066 """Sends a notification to the client.
10069 @param test: Test name
10070 @param arg: Test argument (depends on test)
10071 @type sockname: string
10072 @param sockname: Socket path
10075 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10077 def _Notify(self, prereq, test, arg):
10078 """Notifies the client of a test.
10081 @param prereq: Whether this is a prereq-phase test
10083 @param test: Test name
10084 @param arg: Test argument (depends on test)
10088 errcls = errors.OpPrereqError
10090 errcls = errors.OpExecError
10092 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10096 def CheckArguments(self):
10097 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10098 self.expandnames_calls = 0
10100 def ExpandNames(self):
10101 checkargs_calls = getattr(self, "checkargs_calls", 0)
10102 if checkargs_calls < 1:
10103 raise errors.ProgrammerError("CheckArguments was not called")
10105 self.expandnames_calls += 1
10107 if self.op.notify_waitlock:
10108 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10110 self.LogInfo("Expanding names")
10112 # Get lock on master node (just to get a lock, not for a particular reason)
10113 self.needed_locks = {
10114 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10117 def Exec(self, feedback_fn):
10118 if self.expandnames_calls < 1:
10119 raise errors.ProgrammerError("ExpandNames was not called")
10121 if self.op.notify_exec:
10122 self._Notify(False, constants.JQT_EXEC, None)
10124 self.LogInfo("Executing")
10126 if self.op.log_messages:
10127 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10128 for idx, msg in enumerate(self.op.log_messages):
10129 self.LogInfo("Sending log message %s", idx + 1)
10130 feedback_fn(constants.JQT_MSGPREFIX + msg)
10131 # Report how many test messages have been sent
10132 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10135 raise errors.OpExecError("Opcode failure was requested")
10140 class IAllocator(object):
10141 """IAllocator framework.
10143 An IAllocator instance has three sets of attributes:
10144 - cfg that is needed to query the cluster
10145 - input data (all members of the _KEYS class attribute are required)
10146 - four buffer attributes (in|out_data|text), that represent the
10147 input (to the external script) in text and data structure format,
10148 and the output from it, again in two formats
10149 - the result variables from the script (success, info, nodes) for
10153 # pylint: disable-msg=R0902
10154 # lots of instance attributes
10156 "name", "mem_size", "disks", "disk_template",
10157 "os", "tags", "nics", "vcpus", "hypervisor",
10160 "name", "relocate_from",
10166 def __init__(self, cfg, rpc, mode, **kwargs):
10169 # init buffer variables
10170 self.in_text = self.out_text = self.in_data = self.out_data = None
10171 # init all input fields so that pylint is happy
10173 self.mem_size = self.disks = self.disk_template = None
10174 self.os = self.tags = self.nics = self.vcpus = None
10175 self.hypervisor = None
10176 self.relocate_from = None
10178 self.evac_nodes = None
10180 self.required_nodes = None
10181 # init result fields
10182 self.success = self.info = self.result = None
10183 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10184 keyset = self._ALLO_KEYS
10185 fn = self._AddNewInstance
10186 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10187 keyset = self._RELO_KEYS
10188 fn = self._AddRelocateInstance
10189 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10190 keyset = self._EVAC_KEYS
10191 fn = self._AddEvacuateNodes
10193 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10194 " IAllocator" % self.mode)
10196 if key not in keyset:
10197 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10198 " IAllocator" % key)
10199 setattr(self, key, kwargs[key])
10202 if key not in kwargs:
10203 raise errors.ProgrammerError("Missing input parameter '%s' to"
10204 " IAllocator" % key)
10205 self._BuildInputData(fn)
10207 def _ComputeClusterData(self):
10208 """Compute the generic allocator input data.
10210 This is the data that is independent of the actual operation.
10214 cluster_info = cfg.GetClusterInfo()
10217 "version": constants.IALLOCATOR_VERSION,
10218 "cluster_name": cfg.GetClusterName(),
10219 "cluster_tags": list(cluster_info.GetTags()),
10220 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10221 # we don't have job IDs
10223 iinfo = cfg.GetAllInstancesInfo().values()
10224 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10228 node_list = cfg.GetNodeList()
10230 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10231 hypervisor_name = self.hypervisor
10232 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10233 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10234 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10235 hypervisor_name = cluster_info.enabled_hypervisors[0]
10237 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10240 self.rpc.call_all_instances_info(node_list,
10241 cluster_info.enabled_hypervisors)
10242 for nname, nresult in node_data.items():
10243 # first fill in static (config-based) values
10244 ninfo = cfg.GetNodeInfo(nname)
10246 "tags": list(ninfo.GetTags()),
10247 "primary_ip": ninfo.primary_ip,
10248 "secondary_ip": ninfo.secondary_ip,
10249 "offline": ninfo.offline,
10250 "drained": ninfo.drained,
10251 "master_candidate": ninfo.master_candidate,
10254 if not (ninfo.offline or ninfo.drained):
10255 nresult.Raise("Can't get data for node %s" % nname)
10256 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10258 remote_info = nresult.payload
10260 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10261 'vg_size', 'vg_free', 'cpu_total']:
10262 if attr not in remote_info:
10263 raise errors.OpExecError("Node '%s' didn't return attribute"
10264 " '%s'" % (nname, attr))
10265 if not isinstance(remote_info[attr], int):
10266 raise errors.OpExecError("Node '%s' returned invalid value"
10268 (nname, attr, remote_info[attr]))
10269 # compute memory used by primary instances
10270 i_p_mem = i_p_up_mem = 0
10271 for iinfo, beinfo in i_list:
10272 if iinfo.primary_node == nname:
10273 i_p_mem += beinfo[constants.BE_MEMORY]
10274 if iinfo.name not in node_iinfo[nname].payload:
10277 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10278 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10279 remote_info['memory_free'] -= max(0, i_mem_diff)
10282 i_p_up_mem += beinfo[constants.BE_MEMORY]
10284 # compute memory used by instances
10286 "total_memory": remote_info['memory_total'],
10287 "reserved_memory": remote_info['memory_dom0'],
10288 "free_memory": remote_info['memory_free'],
10289 "total_disk": remote_info['vg_size'],
10290 "free_disk": remote_info['vg_free'],
10291 "total_cpus": remote_info['cpu_total'],
10292 "i_pri_memory": i_p_mem,
10293 "i_pri_up_memory": i_p_up_mem,
10295 pnr.update(pnr_dyn)
10297 node_results[nname] = pnr
10298 data["nodes"] = node_results
10302 for iinfo, beinfo in i_list:
10304 for nic in iinfo.nics:
10305 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10306 nic_dict = {"mac": nic.mac,
10308 "mode": filled_params[constants.NIC_MODE],
10309 "link": filled_params[constants.NIC_LINK],
10311 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10312 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10313 nic_data.append(nic_dict)
10315 "tags": list(iinfo.GetTags()),
10316 "admin_up": iinfo.admin_up,
10317 "vcpus": beinfo[constants.BE_VCPUS],
10318 "memory": beinfo[constants.BE_MEMORY],
10320 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10322 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10323 "disk_template": iinfo.disk_template,
10324 "hypervisor": iinfo.hypervisor,
10326 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10328 instance_data[iinfo.name] = pir
10330 data["instances"] = instance_data
10332 self.in_data = data
10334 def _AddNewInstance(self):
10335 """Add new instance data to allocator structure.
10337 This in combination with _AllocatorGetClusterData will create the
10338 correct structure needed as input for the allocator.
10340 The checks for the completeness of the opcode must have already been
10344 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10346 if self.disk_template in constants.DTS_NET_MIRROR:
10347 self.required_nodes = 2
10349 self.required_nodes = 1
10352 "disk_template": self.disk_template,
10355 "vcpus": self.vcpus,
10356 "memory": self.mem_size,
10357 "disks": self.disks,
10358 "disk_space_total": disk_space,
10360 "required_nodes": self.required_nodes,
10364 def _AddRelocateInstance(self):
10365 """Add relocate instance data to allocator structure.
10367 This in combination with _IAllocatorGetClusterData will create the
10368 correct structure needed as input for the allocator.
10370 The checks for the completeness of the opcode must have already been
10374 instance = self.cfg.GetInstanceInfo(self.name)
10375 if instance is None:
10376 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10377 " IAllocator" % self.name)
10379 if instance.disk_template not in constants.DTS_NET_MIRROR:
10380 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10381 errors.ECODE_INVAL)
10383 if len(instance.secondary_nodes) != 1:
10384 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10385 errors.ECODE_STATE)
10387 self.required_nodes = 1
10388 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10389 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10393 "disk_space_total": disk_space,
10394 "required_nodes": self.required_nodes,
10395 "relocate_from": self.relocate_from,
10399 def _AddEvacuateNodes(self):
10400 """Add evacuate nodes data to allocator structure.
10404 "evac_nodes": self.evac_nodes
10408 def _BuildInputData(self, fn):
10409 """Build input data structures.
10412 self._ComputeClusterData()
10415 request["type"] = self.mode
10416 self.in_data["request"] = request
10418 self.in_text = serializer.Dump(self.in_data)
10420 def Run(self, name, validate=True, call_fn=None):
10421 """Run an instance allocator and return the results.
10424 if call_fn is None:
10425 call_fn = self.rpc.call_iallocator_runner
10427 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10428 result.Raise("Failure while running the iallocator script")
10430 self.out_text = result.payload
10432 self._ValidateResult()
10434 def _ValidateResult(self):
10435 """Process the allocator results.
10437 This will process and if successful save the result in
10438 self.out_data and the other parameters.
10442 rdict = serializer.Load(self.out_text)
10443 except Exception, err:
10444 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10446 if not isinstance(rdict, dict):
10447 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10449 # TODO: remove backwards compatiblity in later versions
10450 if "nodes" in rdict and "result" not in rdict:
10451 rdict["result"] = rdict["nodes"]
10454 for key in "success", "info", "result":
10455 if key not in rdict:
10456 raise errors.OpExecError("Can't parse iallocator results:"
10457 " missing key '%s'" % key)
10458 setattr(self, key, rdict[key])
10460 if not isinstance(rdict["result"], list):
10461 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10463 self.out_data = rdict
10466 class LUTestAllocator(NoHooksLU):
10467 """Run allocator tests.
10469 This LU runs the allocator tests
10473 ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10474 ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10475 ("name", _NoDefault, _TNonEmptyString),
10476 ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10477 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10478 _TOr(_TNone, _TNonEmptyString))))),
10479 ("disks", _NoDefault, _TOr(_TNone, _TList)),
10480 ("hypervisor", None, _TMaybeString),
10481 ("allocator", None, _TMaybeString),
10482 ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10483 ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10484 ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10485 ("os", None, _TMaybeString),
10486 ("disk_template", None, _TMaybeString),
10487 ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10490 def CheckPrereq(self):
10491 """Check prerequisites.
10493 This checks the opcode parameters depending on the director and mode test.
10496 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10497 for attr in ["mem_size", "disks", "disk_template",
10498 "os", "tags", "nics", "vcpus"]:
10499 if not hasattr(self.op, attr):
10500 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10501 attr, errors.ECODE_INVAL)
10502 iname = self.cfg.ExpandInstanceName(self.op.name)
10503 if iname is not None:
10504 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10505 iname, errors.ECODE_EXISTS)
10506 if not isinstance(self.op.nics, list):
10507 raise errors.OpPrereqError("Invalid parameter 'nics'",
10508 errors.ECODE_INVAL)
10509 if not isinstance(self.op.disks, list):
10510 raise errors.OpPrereqError("Invalid parameter 'disks'",
10511 errors.ECODE_INVAL)
10512 for row in self.op.disks:
10513 if (not isinstance(row, dict) or
10514 "size" not in row or
10515 not isinstance(row["size"], int) or
10516 "mode" not in row or
10517 row["mode"] not in ['r', 'w']):
10518 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10519 " parameter", errors.ECODE_INVAL)
10520 if self.op.hypervisor is None:
10521 self.op.hypervisor = self.cfg.GetHypervisorType()
10522 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10523 fname = _ExpandInstanceName(self.cfg, self.op.name)
10524 self.op.name = fname
10525 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10526 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10527 if not hasattr(self.op, "evac_nodes"):
10528 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10529 " opcode input", errors.ECODE_INVAL)
10531 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10532 self.op.mode, errors.ECODE_INVAL)
10534 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10535 if self.op.allocator is None:
10536 raise errors.OpPrereqError("Missing allocator name",
10537 errors.ECODE_INVAL)
10538 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10539 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10540 self.op.direction, errors.ECODE_INVAL)
10542 def Exec(self, feedback_fn):
10543 """Run the allocator test.
10546 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10547 ial = IAllocator(self.cfg, self.rpc,
10550 mem_size=self.op.mem_size,
10551 disks=self.op.disks,
10552 disk_template=self.op.disk_template,
10556 vcpus=self.op.vcpus,
10557 hypervisor=self.op.hypervisor,
10559 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10560 ial = IAllocator(self.cfg, self.rpc,
10563 relocate_from=list(self.relocate_from),
10565 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10566 ial = IAllocator(self.cfg, self.rpc,
10568 evac_nodes=self.op.evac_nodes)
10570 raise errors.ProgrammerError("Uncatched mode %s in"
10571 " LUTestAllocator.Exec", self.op.mode)
10573 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10574 result = ial.in_text
10576 ial.Run(self.op.allocator, validate=False)
10577 result = ial.out_text