4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
43 from ganeti import ssh
44 from ganeti import utils
45 from ganeti import errors
46 from ganeti import hypervisor
47 from ganeti import locking
48 from ganeti import constants
49 from ganeti import objects
50 from ganeti import serializer
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
57 import ganeti.masterd.instance # pylint: disable-msg=W0611
60 # Modifiable default values; need to define these here before the
64 """Returns an empty list.
71 """Returns an empty dict.
77 #: The without-default default value
81 #: The no-type (value to complex to check it in the type system)
87 """Checks if the given value is not None.
90 return val is not None
94 """Checks if the given value is None.
101 """Checks if the given value is a boolean.
104 return isinstance(val, bool)
108 """Checks if the given value is an integer.
111 return isinstance(val, int)
115 """Checks if the given value is a float.
118 return isinstance(val, float)
122 """Checks if the given value is a string.
125 return isinstance(val, basestring)
129 """Checks if a given value evaluates to a boolean True value.
135 def _TElemOf(target_list):
136 """Builds a function that checks if a given value is a member of a list.
139 return lambda val: val in target_list
144 """Checks if the given value is a list.
147 return isinstance(val, list)
151 """Checks if the given value is a dictionary.
154 return isinstance(val, dict)
157 def _TIsLength(size):
158 """Check is the given container is of the given size.
161 return lambda container: len(container) == size
166 """Combine multiple functions using an AND operation.
170 return compat.all(t(val) for t in args)
175 """Combine multiple functions using an AND operation.
179 return compat.any(t(val) for t in args)
184 """Checks that a modified version of the argument passes the given test.
187 return lambda val: test(fn(val))
192 #: a non-empty string
193 _TNonEmptyString = _TAnd(_TString, _TTrue)
196 #: a maybe non-empty string
197 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
200 #: a maybe boolean (bool or none)
201 _TMaybeBool = _TOr(_TBool, _TNone)
204 #: a positive integer
205 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
207 #: a strictly positive integer
208 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
211 def _TListOf(my_type):
212 """Checks if a given value is a list with all elements of the same type.
216 lambda lst: compat.all(my_type(v) for v in lst))
219 def _TDictOf(key_type, val_type):
220 """Checks a dict type for the type of its key/values.
224 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
225 and compat.all(val_type(v)
226 for v in my_dict.values())))
229 # Common opcode attributes
231 #: output fields for a query operation
232 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
235 #: the shutdown timeout
236 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
239 #: the force parameter
240 _PForce = ("force", False, _TBool)
242 #: a required instance name (for single-instance LUs)
243 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
246 #: a required node name (for single-node LUs)
247 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
249 #: the migration type (live/non-live)
250 _PMigrationMode = ("mode", None, _TOr(_TNone,
251 _TElemOf(constants.HT_MIGRATION_MODES)))
253 #: the obsolete 'live' mode (boolean)
254 _PMigrationLive = ("live", None, _TMaybeBool)
258 class LogicalUnit(object):
259 """Logical Unit base class.
261 Subclasses must follow these rules:
262 - implement ExpandNames
263 - implement CheckPrereq (except when tasklets are used)
264 - implement Exec (except when tasklets are used)
265 - implement BuildHooksEnv
266 - redefine HPATH and HTYPE
267 - optionally redefine their run requirements:
268 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
270 Note that all commands require root permissions.
272 @ivar dry_run_result: the value (if any) that will be returned to the caller
273 in dry-run mode (signalled by opcode dry_run parameter)
274 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
275 they should get if not already defined, and types they must match
283 def __init__(self, processor, op, context, rpc):
284 """Constructor for LogicalUnit.
286 This needs to be overridden in derived classes in order to check op
290 self.proc = processor
292 self.cfg = context.cfg
293 self.context = context
295 # Dicts used to declare locking needs to mcpu
296 self.needed_locks = None
297 self.acquired_locks = {}
298 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
300 self.remove_locks = {}
301 # Used to force good behavior when calling helper functions
302 self.recalculate_locks = {}
305 self.Log = processor.Log # pylint: disable-msg=C0103
306 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
307 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
308 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
309 # support for dry-run
310 self.dry_run_result = None
311 # support for generic debug attribute
312 if (not hasattr(self.op, "debug_level") or
313 not isinstance(self.op.debug_level, int)):
314 self.op.debug_level = 0
319 # The new kind-of-type-system
320 op_id = self.op.OP_ID
321 for attr_name, aval, test in self._OP_PARAMS:
322 if not hasattr(op, attr_name):
323 if aval == _NoDefault:
324 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
325 (op_id, attr_name), errors.ECODE_INVAL)
331 setattr(self.op, attr_name, dval)
332 attr_val = getattr(op, attr_name)
336 if not callable(test):
337 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
338 " given type is not a proper type (%s)" %
339 (op_id, attr_name, test))
340 if not test(attr_val):
341 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
342 self.op.OP_ID, attr_name, type(attr_val), attr_val)
343 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
344 (op_id, attr_name), errors.ECODE_INVAL)
346 self.CheckArguments()
349 """Returns the SshRunner object
353 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
356 ssh = property(fget=__GetSSH)
358 def CheckArguments(self):
359 """Check syntactic validity for the opcode arguments.
361 This method is for doing a simple syntactic check and ensure
362 validity of opcode parameters, without any cluster-related
363 checks. While the same can be accomplished in ExpandNames and/or
364 CheckPrereq, doing these separate is better because:
366 - ExpandNames is left as as purely a lock-related function
367 - CheckPrereq is run after we have acquired locks (and possible
370 The function is allowed to change the self.op attribute so that
371 later methods can no longer worry about missing parameters.
376 def ExpandNames(self):
377 """Expand names for this LU.
379 This method is called before starting to execute the opcode, and it should
380 update all the parameters of the opcode to their canonical form (e.g. a
381 short node name must be fully expanded after this method has successfully
382 completed). This way locking, hooks, logging, ecc. can work correctly.
384 LUs which implement this method must also populate the self.needed_locks
385 member, as a dict with lock levels as keys, and a list of needed lock names
388 - use an empty dict if you don't need any lock
389 - if you don't need any lock at a particular level omit that level
390 - don't put anything for the BGL level
391 - if you want all locks at a level use locking.ALL_SET as a value
393 If you need to share locks (rather than acquire them exclusively) at one
394 level you can modify self.share_locks, setting a true value (usually 1) for
395 that level. By default locks are not shared.
397 This function can also define a list of tasklets, which then will be
398 executed in order instead of the usual LU-level CheckPrereq and Exec
399 functions, if those are not defined by the LU.
403 # Acquire all nodes and one instance
404 self.needed_locks = {
405 locking.LEVEL_NODE: locking.ALL_SET,
406 locking.LEVEL_INSTANCE: ['instance1.example.com'],
408 # Acquire just two nodes
409 self.needed_locks = {
410 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
413 self.needed_locks = {} # No, you can't leave it to the default value None
416 # The implementation of this method is mandatory only if the new LU is
417 # concurrent, so that old LUs don't need to be changed all at the same
420 self.needed_locks = {} # Exclusive LUs don't need locks.
422 raise NotImplementedError
424 def DeclareLocks(self, level):
425 """Declare LU locking needs for a level
427 While most LUs can just declare their locking needs at ExpandNames time,
428 sometimes there's the need to calculate some locks after having acquired
429 the ones before. This function is called just before acquiring locks at a
430 particular level, but after acquiring the ones at lower levels, and permits
431 such calculations. It can be used to modify self.needed_locks, and by
432 default it does nothing.
434 This function is only called if you have something already set in
435 self.needed_locks for the level.
437 @param level: Locking level which is going to be locked
438 @type level: member of ganeti.locking.LEVELS
442 def CheckPrereq(self):
443 """Check prerequisites for this LU.
445 This method should check that the prerequisites for the execution
446 of this LU are fulfilled. It can do internode communication, but
447 it should be idempotent - no cluster or system changes are
450 The method should raise errors.OpPrereqError in case something is
451 not fulfilled. Its return value is ignored.
453 This method should also update all the parameters of the opcode to
454 their canonical form if it hasn't been done by ExpandNames before.
457 if self.tasklets is not None:
458 for (idx, tl) in enumerate(self.tasklets):
459 logging.debug("Checking prerequisites for tasklet %s/%s",
460 idx + 1, len(self.tasklets))
465 def Exec(self, feedback_fn):
468 This method should implement the actual work. It should raise
469 errors.OpExecError for failures that are somewhat dealt with in
473 if self.tasklets is not None:
474 for (idx, tl) in enumerate(self.tasklets):
475 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
478 raise NotImplementedError
480 def BuildHooksEnv(self):
481 """Build hooks environment for this LU.
483 This method should return a three-node tuple consisting of: a dict
484 containing the environment that will be used for running the
485 specific hook for this LU, a list of node names on which the hook
486 should run before the execution, and a list of node names on which
487 the hook should run after the execution.
489 The keys of the dict must not have 'GANETI_' prefixed as this will
490 be handled in the hooks runner. Also note additional keys will be
491 added by the hooks runner. If the LU doesn't define any
492 environment, an empty dict (and not None) should be returned.
494 No nodes should be returned as an empty list (and not None).
496 Note that if the HPATH for a LU class is None, this function will
500 raise NotImplementedError
502 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
503 """Notify the LU about the results of its hooks.
505 This method is called every time a hooks phase is executed, and notifies
506 the Logical Unit about the hooks' result. The LU can then use it to alter
507 its result based on the hooks. By default the method does nothing and the
508 previous result is passed back unchanged but any LU can define it if it
509 wants to use the local cluster hook-scripts somehow.
511 @param phase: one of L{constants.HOOKS_PHASE_POST} or
512 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
513 @param hook_results: the results of the multi-node hooks rpc call
514 @param feedback_fn: function used send feedback back to the caller
515 @param lu_result: the previous Exec result this LU had, or None
517 @return: the new Exec result, based on the previous result
521 # API must be kept, thus we ignore the unused argument and could
522 # be a function warnings
523 # pylint: disable-msg=W0613,R0201
526 def _ExpandAndLockInstance(self):
527 """Helper function to expand and lock an instance.
529 Many LUs that work on an instance take its name in self.op.instance_name
530 and need to expand it and then declare the expanded name for locking. This
531 function does it, and then updates self.op.instance_name to the expanded
532 name. It also initializes needed_locks as a dict, if this hasn't been done
536 if self.needed_locks is None:
537 self.needed_locks = {}
539 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
540 "_ExpandAndLockInstance called with instance-level locks set"
541 self.op.instance_name = _ExpandInstanceName(self.cfg,
542 self.op.instance_name)
543 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
545 def _LockInstancesNodes(self, primary_only=False):
546 """Helper function to declare instances' nodes for locking.
548 This function should be called after locking one or more instances to lock
549 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
550 with all primary or secondary nodes for instances already locked and
551 present in self.needed_locks[locking.LEVEL_INSTANCE].
553 It should be called from DeclareLocks, and for safety only works if
554 self.recalculate_locks[locking.LEVEL_NODE] is set.
556 In the future it may grow parameters to just lock some instance's nodes, or
557 to just lock primaries or secondary nodes, if needed.
559 If should be called in DeclareLocks in a way similar to::
561 if level == locking.LEVEL_NODE:
562 self._LockInstancesNodes()
564 @type primary_only: boolean
565 @param primary_only: only lock primary nodes of locked instances
568 assert locking.LEVEL_NODE in self.recalculate_locks, \
569 "_LockInstancesNodes helper function called with no nodes to recalculate"
571 # TODO: check if we're really been called with the instance locks held
573 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
574 # future we might want to have different behaviors depending on the value
575 # of self.recalculate_locks[locking.LEVEL_NODE]
577 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
578 instance = self.context.cfg.GetInstanceInfo(instance_name)
579 wanted_nodes.append(instance.primary_node)
581 wanted_nodes.extend(instance.secondary_nodes)
583 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
584 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
585 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
586 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
588 del self.recalculate_locks[locking.LEVEL_NODE]
591 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
592 """Simple LU which runs no hooks.
594 This LU is intended as a parent for other LogicalUnits which will
595 run no hooks, in order to reduce duplicate code.
601 def BuildHooksEnv(self):
602 """Empty BuildHooksEnv for NoHooksLu.
604 This just raises an error.
607 assert False, "BuildHooksEnv called for NoHooksLUs"
611 """Tasklet base class.
613 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
614 they can mix legacy code with tasklets. Locking needs to be done in the LU,
615 tasklets know nothing about locks.
617 Subclasses must follow these rules:
618 - Implement CheckPrereq
622 def __init__(self, lu):
629 def CheckPrereq(self):
630 """Check prerequisites for this tasklets.
632 This method should check whether the prerequisites for the execution of
633 this tasklet are fulfilled. It can do internode communication, but it
634 should be idempotent - no cluster or system changes are allowed.
636 The method should raise errors.OpPrereqError in case something is not
637 fulfilled. Its return value is ignored.
639 This method should also update all parameters to their canonical form if it
640 hasn't been done before.
645 def Exec(self, feedback_fn):
646 """Execute the tasklet.
648 This method should implement the actual work. It should raise
649 errors.OpExecError for failures that are somewhat dealt with in code, or
653 raise NotImplementedError
656 def _GetWantedNodes(lu, nodes):
657 """Returns list of checked and expanded node names.
659 @type lu: L{LogicalUnit}
660 @param lu: the logical unit on whose behalf we execute
662 @param nodes: list of node names or None for all nodes
664 @return: the list of nodes, sorted
665 @raise errors.ProgrammerError: if the nodes parameter is wrong type
669 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
670 " non-empty list of nodes whose name is to be expanded.")
672 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
673 return utils.NiceSort(wanted)
676 def _GetWantedInstances(lu, instances):
677 """Returns list of checked and expanded instance names.
679 @type lu: L{LogicalUnit}
680 @param lu: the logical unit on whose behalf we execute
681 @type instances: list
682 @param instances: list of instance names or None for all instances
684 @return: the list of instances, sorted
685 @raise errors.OpPrereqError: if the instances parameter is wrong type
686 @raise errors.OpPrereqError: if any of the passed instances is not found
690 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
692 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
696 def _GetUpdatedParams(old_params, update_dict,
697 use_default=True, use_none=False):
698 """Return the new version of a parameter dictionary.
700 @type old_params: dict
701 @param old_params: old parameters
702 @type update_dict: dict
703 @param update_dict: dict containing new parameter values, or
704 constants.VALUE_DEFAULT to reset the parameter to its default
706 @param use_default: boolean
707 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
708 values as 'to be deleted' values
709 @param use_none: boolean
710 @type use_none: whether to recognise C{None} values as 'to be
713 @return: the new parameter dictionary
716 params_copy = copy.deepcopy(old_params)
717 for key, val in update_dict.iteritems():
718 if ((use_default and val == constants.VALUE_DEFAULT) or
719 (use_none and val is None)):
725 params_copy[key] = val
729 def _CheckOutputFields(static, dynamic, selected):
730 """Checks whether all selected fields are valid.
732 @type static: L{utils.FieldSet}
733 @param static: static fields set
734 @type dynamic: L{utils.FieldSet}
735 @param dynamic: dynamic fields set
742 delta = f.NonMatching(selected)
744 raise errors.OpPrereqError("Unknown output fields selected: %s"
745 % ",".join(delta), errors.ECODE_INVAL)
748 def _CheckGlobalHvParams(params):
749 """Validates that given hypervisor params are not global ones.
751 This will ensure that instances don't get customised versions of
755 used_globals = constants.HVC_GLOBALS.intersection(params)
757 msg = ("The following hypervisor parameters are global and cannot"
758 " be customized at instance level, please modify them at"
759 " cluster level: %s" % utils.CommaJoin(used_globals))
760 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
763 def _CheckNodeOnline(lu, node):
764 """Ensure that a given node is online.
766 @param lu: the LU on behalf of which we make the check
767 @param node: the node to check
768 @raise errors.OpPrereqError: if the node is offline
771 if lu.cfg.GetNodeInfo(node).offline:
772 raise errors.OpPrereqError("Can't use offline node %s" % node,
776 def _CheckNodeNotDrained(lu, node):
777 """Ensure that a given node is not drained.
779 @param lu: the LU on behalf of which we make the check
780 @param node: the node to check
781 @raise errors.OpPrereqError: if the node is drained
784 if lu.cfg.GetNodeInfo(node).drained:
785 raise errors.OpPrereqError("Can't use drained node %s" % node,
789 def _CheckNodeHasOS(lu, node, os_name, force_variant):
790 """Ensure that a node supports a given OS.
792 @param lu: the LU on behalf of which we make the check
793 @param node: the node to check
794 @param os_name: the OS to query about
795 @param force_variant: whether to ignore variant errors
796 @raise errors.OpPrereqError: if the node is not supporting the OS
799 result = lu.rpc.call_os_get(node, os_name)
800 result.Raise("OS '%s' not in supported OS list for node %s" %
802 prereq=True, ecode=errors.ECODE_INVAL)
803 if not force_variant:
804 _CheckOSVariant(result.payload, os_name)
807 def _RequireFileStorage():
808 """Checks that file storage is enabled.
810 @raise errors.OpPrereqError: when file storage is disabled
813 if not constants.ENABLE_FILE_STORAGE:
814 raise errors.OpPrereqError("File storage disabled at configure time",
818 def _CheckDiskTemplate(template):
819 """Ensure a given disk template is valid.
822 if template not in constants.DISK_TEMPLATES:
823 msg = ("Invalid disk template name '%s', valid templates are: %s" %
824 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
825 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
826 if template == constants.DT_FILE:
827 _RequireFileStorage()
831 def _CheckStorageType(storage_type):
832 """Ensure a given storage type is valid.
835 if storage_type not in constants.VALID_STORAGE_TYPES:
836 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
838 if storage_type == constants.ST_FILE:
839 _RequireFileStorage()
843 def _GetClusterDomainSecret():
844 """Reads the cluster domain secret.
847 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
851 def _CheckInstanceDown(lu, instance, reason):
852 """Ensure that an instance is not running."""
853 if instance.admin_up:
854 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
855 (instance.name, reason), errors.ECODE_STATE)
857 pnode = instance.primary_node
858 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
859 ins_l.Raise("Can't contact node %s for instance information" % pnode,
860 prereq=True, ecode=errors.ECODE_ENVIRON)
862 if instance.name in ins_l.payload:
863 raise errors.OpPrereqError("Instance %s is running, %s" %
864 (instance.name, reason), errors.ECODE_STATE)
867 def _ExpandItemName(fn, name, kind):
868 """Expand an item name.
870 @param fn: the function to use for expansion
871 @param name: requested item name
872 @param kind: text description ('Node' or 'Instance')
873 @return: the resolved (full) name
874 @raise errors.OpPrereqError: if the item is not found
878 if full_name is None:
879 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
884 def _ExpandNodeName(cfg, name):
885 """Wrapper over L{_ExpandItemName} for nodes."""
886 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
889 def _ExpandInstanceName(cfg, name):
890 """Wrapper over L{_ExpandItemName} for instance."""
891 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
894 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
895 memory, vcpus, nics, disk_template, disks,
896 bep, hvp, hypervisor_name):
897 """Builds instance related env variables for hooks
899 This builds the hook environment from individual variables.
902 @param name: the name of the instance
903 @type primary_node: string
904 @param primary_node: the name of the instance's primary node
905 @type secondary_nodes: list
906 @param secondary_nodes: list of secondary nodes as strings
907 @type os_type: string
908 @param os_type: the name of the instance's OS
909 @type status: boolean
910 @param status: the should_run status of the instance
912 @param memory: the memory size of the instance
914 @param vcpus: the count of VCPUs the instance has
916 @param nics: list of tuples (ip, mac, mode, link) representing
917 the NICs the instance has
918 @type disk_template: string
919 @param disk_template: the disk template of the instance
921 @param disks: the list of (size, mode) pairs
923 @param bep: the backend parameters for the instance
925 @param hvp: the hypervisor parameters for the instance
926 @type hypervisor_name: string
927 @param hypervisor_name: the hypervisor for the instance
929 @return: the hook environment for this instance
938 "INSTANCE_NAME": name,
939 "INSTANCE_PRIMARY": primary_node,
940 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
941 "INSTANCE_OS_TYPE": os_type,
942 "INSTANCE_STATUS": str_status,
943 "INSTANCE_MEMORY": memory,
944 "INSTANCE_VCPUS": vcpus,
945 "INSTANCE_DISK_TEMPLATE": disk_template,
946 "INSTANCE_HYPERVISOR": hypervisor_name,
950 nic_count = len(nics)
951 for idx, (ip, mac, mode, link) in enumerate(nics):
954 env["INSTANCE_NIC%d_IP" % idx] = ip
955 env["INSTANCE_NIC%d_MAC" % idx] = mac
956 env["INSTANCE_NIC%d_MODE" % idx] = mode
957 env["INSTANCE_NIC%d_LINK" % idx] = link
958 if mode == constants.NIC_MODE_BRIDGED:
959 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
963 env["INSTANCE_NIC_COUNT"] = nic_count
966 disk_count = len(disks)
967 for idx, (size, mode) in enumerate(disks):
968 env["INSTANCE_DISK%d_SIZE" % idx] = size
969 env["INSTANCE_DISK%d_MODE" % idx] = mode
973 env["INSTANCE_DISK_COUNT"] = disk_count
975 for source, kind in [(bep, "BE"), (hvp, "HV")]:
976 for key, value in source.items():
977 env["INSTANCE_%s_%s" % (kind, key)] = value
982 def _NICListToTuple(lu, nics):
983 """Build a list of nic information tuples.
985 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
986 value in LUQueryInstanceData.
988 @type lu: L{LogicalUnit}
989 @param lu: the logical unit on whose behalf we execute
990 @type nics: list of L{objects.NIC}
991 @param nics: list of nics to convert to hooks tuples
995 cluster = lu.cfg.GetClusterInfo()
999 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1000 mode = filled_params[constants.NIC_MODE]
1001 link = filled_params[constants.NIC_LINK]
1002 hooks_nics.append((ip, mac, mode, link))
1006 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1007 """Builds instance related env variables for hooks from an object.
1009 @type lu: L{LogicalUnit}
1010 @param lu: the logical unit on whose behalf we execute
1011 @type instance: L{objects.Instance}
1012 @param instance: the instance for which we should build the
1014 @type override: dict
1015 @param override: dictionary with key/values that will override
1018 @return: the hook environment dictionary
1021 cluster = lu.cfg.GetClusterInfo()
1022 bep = cluster.FillBE(instance)
1023 hvp = cluster.FillHV(instance)
1025 'name': instance.name,
1026 'primary_node': instance.primary_node,
1027 'secondary_nodes': instance.secondary_nodes,
1028 'os_type': instance.os,
1029 'status': instance.admin_up,
1030 'memory': bep[constants.BE_MEMORY],
1031 'vcpus': bep[constants.BE_VCPUS],
1032 'nics': _NICListToTuple(lu, instance.nics),
1033 'disk_template': instance.disk_template,
1034 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1037 'hypervisor_name': instance.hypervisor,
1040 args.update(override)
1041 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1044 def _AdjustCandidatePool(lu, exceptions):
1045 """Adjust the candidate pool after node operations.
1048 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1050 lu.LogInfo("Promoted nodes to master candidate role: %s",
1051 utils.CommaJoin(node.name for node in mod_list))
1052 for name in mod_list:
1053 lu.context.ReaddNode(name)
1054 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1056 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1060 def _DecideSelfPromotion(lu, exceptions=None):
1061 """Decide whether I should promote myself as a master candidate.
1064 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1065 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1066 # the new node will increase mc_max with one, so:
1067 mc_should = min(mc_should + 1, cp_size)
1068 return mc_now < mc_should
1071 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1072 """Check that the brigdes needed by a list of nics exist.
1075 cluster = lu.cfg.GetClusterInfo()
1076 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1077 brlist = [params[constants.NIC_LINK] for params in paramslist
1078 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1080 result = lu.rpc.call_bridges_exist(target_node, brlist)
1081 result.Raise("Error checking bridges on destination node '%s'" %
1082 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1085 def _CheckInstanceBridgesExist(lu, instance, node=None):
1086 """Check that the brigdes needed by an instance exist.
1090 node = instance.primary_node
1091 _CheckNicsBridgesExist(lu, instance.nics, node)
1094 def _CheckOSVariant(os_obj, name):
1095 """Check whether an OS name conforms to the os variants specification.
1097 @type os_obj: L{objects.OS}
1098 @param os_obj: OS object to check
1100 @param name: OS name passed by the user, to check for validity
1103 if not os_obj.supported_variants:
1105 variant = objects.OS.GetVariant(name)
1107 raise errors.OpPrereqError("OS name must include a variant",
1110 if variant not in os_obj.supported_variants:
1111 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1114 def _GetNodeInstancesInner(cfg, fn):
1115 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1118 def _GetNodeInstances(cfg, node_name):
1119 """Returns a list of all primary and secondary instances on a node.
1123 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1126 def _GetNodePrimaryInstances(cfg, node_name):
1127 """Returns primary instances on a node.
1130 return _GetNodeInstancesInner(cfg,
1131 lambda inst: node_name == inst.primary_node)
1134 def _GetNodeSecondaryInstances(cfg, node_name):
1135 """Returns secondary instances on a node.
1138 return _GetNodeInstancesInner(cfg,
1139 lambda inst: node_name in inst.secondary_nodes)
1142 def _GetStorageTypeArgs(cfg, storage_type):
1143 """Returns the arguments for a storage type.
1146 # Special case for file storage
1147 if storage_type == constants.ST_FILE:
1148 # storage.FileStorage wants a list of storage directories
1149 return [[cfg.GetFileStorageDir()]]
1154 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1157 for dev in instance.disks:
1158 cfg.SetDiskID(dev, node_name)
1160 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1161 result.Raise("Failed to get disk status from node %s" % node_name,
1162 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1164 for idx, bdev_status in enumerate(result.payload):
1165 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1171 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1172 """Check the sanity of iallocator and node arguments and use the
1173 cluster-wide iallocator if appropriate.
1175 Check that at most one of (iallocator, node) is specified. If none is
1176 specified, then the LU's opcode's iallocator slot is filled with the
1177 cluster-wide default iallocator.
1179 @type iallocator_slot: string
1180 @param iallocator_slot: the name of the opcode iallocator slot
1181 @type node_slot: string
1182 @param node_slot: the name of the opcode target node slot
1185 node = getattr(lu.op, node_slot, None)
1186 iallocator = getattr(lu.op, iallocator_slot, None)
1188 if node is not None and iallocator is not None:
1189 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1191 elif node is None and iallocator is None:
1192 default_iallocator = lu.cfg.GetDefaultIAllocator()
1193 if default_iallocator:
1194 setattr(lu.op, iallocator_slot, default_iallocator)
1196 raise errors.OpPrereqError("No iallocator or node given and no"
1197 " cluster-wide default iallocator found."
1198 " Please specify either an iallocator or a"
1199 " node, or set a cluster-wide default"
1203 class LUPostInitCluster(LogicalUnit):
1204 """Logical unit for running hooks after cluster initialization.
1207 HPATH = "cluster-init"
1208 HTYPE = constants.HTYPE_CLUSTER
1210 def BuildHooksEnv(self):
1214 env = {"OP_TARGET": self.cfg.GetClusterName()}
1215 mn = self.cfg.GetMasterNode()
1216 return env, [], [mn]
1218 def Exec(self, feedback_fn):
1225 class LUDestroyCluster(LogicalUnit):
1226 """Logical unit for destroying the cluster.
1229 HPATH = "cluster-destroy"
1230 HTYPE = constants.HTYPE_CLUSTER
1232 def BuildHooksEnv(self):
1236 env = {"OP_TARGET": self.cfg.GetClusterName()}
1239 def CheckPrereq(self):
1240 """Check prerequisites.
1242 This checks whether the cluster is empty.
1244 Any errors are signaled by raising errors.OpPrereqError.
1247 master = self.cfg.GetMasterNode()
1249 nodelist = self.cfg.GetNodeList()
1250 if len(nodelist) != 1 or nodelist[0] != master:
1251 raise errors.OpPrereqError("There are still %d node(s) in"
1252 " this cluster." % (len(nodelist) - 1),
1254 instancelist = self.cfg.GetInstanceList()
1256 raise errors.OpPrereqError("There are still %d instance(s) in"
1257 " this cluster." % len(instancelist),
1260 def Exec(self, feedback_fn):
1261 """Destroys the cluster.
1264 master = self.cfg.GetMasterNode()
1265 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1267 # Run post hooks on master node before it's removed
1268 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1270 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1272 # pylint: disable-msg=W0702
1273 self.LogWarning("Errors occurred running hooks on %s" % master)
1275 result = self.rpc.call_node_stop_master(master, False)
1276 result.Raise("Could not disable the master role")
1278 if modify_ssh_setup:
1279 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1280 utils.CreateBackup(priv_key)
1281 utils.CreateBackup(pub_key)
1286 def _VerifyCertificate(filename):
1287 """Verifies a certificate for LUVerifyCluster.
1289 @type filename: string
1290 @param filename: Path to PEM file
1294 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1295 utils.ReadFile(filename))
1296 except Exception, err: # pylint: disable-msg=W0703
1297 return (LUVerifyCluster.ETYPE_ERROR,
1298 "Failed to load X509 certificate %s: %s" % (filename, err))
1301 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1302 constants.SSL_CERT_EXPIRATION_ERROR)
1305 fnamemsg = "While verifying %s: %s" % (filename, msg)
1310 return (None, fnamemsg)
1311 elif errcode == utils.CERT_WARNING:
1312 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1313 elif errcode == utils.CERT_ERROR:
1314 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1316 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1319 class LUVerifyCluster(LogicalUnit):
1320 """Verifies the cluster status.
1323 HPATH = "cluster-verify"
1324 HTYPE = constants.HTYPE_CLUSTER
1326 ("skip_checks", _EmptyList,
1327 _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1328 ("verbose", False, _TBool),
1329 ("error_codes", False, _TBool),
1330 ("debug_simulate_errors", False, _TBool),
1334 TCLUSTER = "cluster"
1336 TINSTANCE = "instance"
1338 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1339 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1340 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1341 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1342 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1343 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1344 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1345 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1346 ENODEDRBD = (TNODE, "ENODEDRBD")
1347 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1348 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1349 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1350 ENODEHV = (TNODE, "ENODEHV")
1351 ENODELVM = (TNODE, "ENODELVM")
1352 ENODEN1 = (TNODE, "ENODEN1")
1353 ENODENET = (TNODE, "ENODENET")
1354 ENODEOS = (TNODE, "ENODEOS")
1355 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1356 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1357 ENODERPC = (TNODE, "ENODERPC")
1358 ENODESSH = (TNODE, "ENODESSH")
1359 ENODEVERSION = (TNODE, "ENODEVERSION")
1360 ENODESETUP = (TNODE, "ENODESETUP")
1361 ENODETIME = (TNODE, "ENODETIME")
1363 ETYPE_FIELD = "code"
1364 ETYPE_ERROR = "ERROR"
1365 ETYPE_WARNING = "WARNING"
1367 class NodeImage(object):
1368 """A class representing the logical and physical status of a node.
1371 @ivar name: the node name to which this object refers
1372 @ivar volumes: a structure as returned from
1373 L{ganeti.backend.GetVolumeList} (runtime)
1374 @ivar instances: a list of running instances (runtime)
1375 @ivar pinst: list of configured primary instances (config)
1376 @ivar sinst: list of configured secondary instances (config)
1377 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1378 of this node (config)
1379 @ivar mfree: free memory, as reported by hypervisor (runtime)
1380 @ivar dfree: free disk, as reported by the node (runtime)
1381 @ivar offline: the offline status (config)
1382 @type rpc_fail: boolean
1383 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1384 not whether the individual keys were correct) (runtime)
1385 @type lvm_fail: boolean
1386 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1387 @type hyp_fail: boolean
1388 @ivar hyp_fail: whether the RPC call didn't return the instance list
1389 @type ghost: boolean
1390 @ivar ghost: whether this is a known node or not (config)
1391 @type os_fail: boolean
1392 @ivar os_fail: whether the RPC call didn't return valid OS data
1394 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1397 def __init__(self, offline=False, name=None):
1406 self.offline = offline
1407 self.rpc_fail = False
1408 self.lvm_fail = False
1409 self.hyp_fail = False
1411 self.os_fail = False
1414 def ExpandNames(self):
1415 self.needed_locks = {
1416 locking.LEVEL_NODE: locking.ALL_SET,
1417 locking.LEVEL_INSTANCE: locking.ALL_SET,
1419 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1421 def _Error(self, ecode, item, msg, *args, **kwargs):
1422 """Format an error message.
1424 Based on the opcode's error_codes parameter, either format a
1425 parseable error code, or a simpler error string.
1427 This must be called only from Exec and functions called from Exec.
1430 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1432 # first complete the msg
1435 # then format the whole message
1436 if self.op.error_codes:
1437 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1443 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1444 # and finally report it via the feedback_fn
1445 self._feedback_fn(" - %s" % msg)
1447 def _ErrorIf(self, cond, *args, **kwargs):
1448 """Log an error message if the passed condition is True.
1451 cond = bool(cond) or self.op.debug_simulate_errors
1453 self._Error(*args, **kwargs)
1454 # do not mark the operation as failed for WARN cases only
1455 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1456 self.bad = self.bad or cond
1458 def _VerifyNode(self, ninfo, nresult):
1459 """Perform some basic validation on data returned from a node.
1461 - check the result data structure is well formed and has all the
1463 - check ganeti version
1465 @type ninfo: L{objects.Node}
1466 @param ninfo: the node to check
1467 @param nresult: the results from the node
1469 @return: whether overall this call was successful (and we can expect
1470 reasonable values in the respose)
1474 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1476 # main result, nresult should be a non-empty dict
1477 test = not nresult or not isinstance(nresult, dict)
1478 _ErrorIf(test, self.ENODERPC, node,
1479 "unable to verify node: no data returned")
1483 # compares ganeti version
1484 local_version = constants.PROTOCOL_VERSION
1485 remote_version = nresult.get("version", None)
1486 test = not (remote_version and
1487 isinstance(remote_version, (list, tuple)) and
1488 len(remote_version) == 2)
1489 _ErrorIf(test, self.ENODERPC, node,
1490 "connection to node returned invalid data")
1494 test = local_version != remote_version[0]
1495 _ErrorIf(test, self.ENODEVERSION, node,
1496 "incompatible protocol versions: master %s,"
1497 " node %s", local_version, remote_version[0])
1501 # node seems compatible, we can actually try to look into its results
1503 # full package version
1504 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1505 self.ENODEVERSION, node,
1506 "software version mismatch: master %s, node %s",
1507 constants.RELEASE_VERSION, remote_version[1],
1508 code=self.ETYPE_WARNING)
1510 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1511 if isinstance(hyp_result, dict):
1512 for hv_name, hv_result in hyp_result.iteritems():
1513 test = hv_result is not None
1514 _ErrorIf(test, self.ENODEHV, node,
1515 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1518 test = nresult.get(constants.NV_NODESETUP,
1519 ["Missing NODESETUP results"])
1520 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1525 def _VerifyNodeTime(self, ninfo, nresult,
1526 nvinfo_starttime, nvinfo_endtime):
1527 """Check the node time.
1529 @type ninfo: L{objects.Node}
1530 @param ninfo: the node to check
1531 @param nresult: the remote results for the node
1532 @param nvinfo_starttime: the start time of the RPC call
1533 @param nvinfo_endtime: the end time of the RPC call
1537 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1539 ntime = nresult.get(constants.NV_TIME, None)
1541 ntime_merged = utils.MergeTime(ntime)
1542 except (ValueError, TypeError):
1543 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1546 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1547 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1548 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1549 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1553 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1554 "Node time diverges by at least %s from master node time",
1557 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1558 """Check the node time.
1560 @type ninfo: L{objects.Node}
1561 @param ninfo: the node to check
1562 @param nresult: the remote results for the node
1563 @param vg_name: the configured VG name
1570 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1572 # checks vg existence and size > 20G
1573 vglist = nresult.get(constants.NV_VGLIST, None)
1575 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1577 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1578 constants.MIN_VG_SIZE)
1579 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1582 pvlist = nresult.get(constants.NV_PVLIST, None)
1583 test = pvlist is None
1584 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1586 # check that ':' is not present in PV names, since it's a
1587 # special character for lvcreate (denotes the range of PEs to
1589 for _, pvname, owner_vg in pvlist:
1590 test = ":" in pvname
1591 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1592 " '%s' of VG '%s'", pvname, owner_vg)
1594 def _VerifyNodeNetwork(self, ninfo, nresult):
1595 """Check the node time.
1597 @type ninfo: L{objects.Node}
1598 @param ninfo: the node to check
1599 @param nresult: the remote results for the node
1603 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1605 test = constants.NV_NODELIST not in nresult
1606 _ErrorIf(test, self.ENODESSH, node,
1607 "node hasn't returned node ssh connectivity data")
1609 if nresult[constants.NV_NODELIST]:
1610 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1611 _ErrorIf(True, self.ENODESSH, node,
1612 "ssh communication with node '%s': %s", a_node, a_msg)
1614 test = constants.NV_NODENETTEST not in nresult
1615 _ErrorIf(test, self.ENODENET, node,
1616 "node hasn't returned node tcp connectivity data")
1618 if nresult[constants.NV_NODENETTEST]:
1619 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1621 _ErrorIf(True, self.ENODENET, node,
1622 "tcp communication with node '%s': %s",
1623 anode, nresult[constants.NV_NODENETTEST][anode])
1625 test = constants.NV_MASTERIP not in nresult
1626 _ErrorIf(test, self.ENODENET, node,
1627 "node hasn't returned node master IP reachability data")
1629 if not nresult[constants.NV_MASTERIP]:
1630 if node == self.master_node:
1631 msg = "the master node cannot reach the master IP (not configured?)"
1633 msg = "cannot reach the master IP"
1634 _ErrorIf(True, self.ENODENET, node, msg)
1637 def _VerifyInstance(self, instance, instanceconfig, node_image):
1638 """Verify an instance.
1640 This function checks to see if the required block devices are
1641 available on the instance's node.
1644 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1645 node_current = instanceconfig.primary_node
1647 node_vol_should = {}
1648 instanceconfig.MapLVsByNode(node_vol_should)
1650 for node in node_vol_should:
1651 n_img = node_image[node]
1652 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1653 # ignore missing volumes on offline or broken nodes
1655 for volume in node_vol_should[node]:
1656 test = volume not in n_img.volumes
1657 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1658 "volume %s missing on node %s", volume, node)
1660 if instanceconfig.admin_up:
1661 pri_img = node_image[node_current]
1662 test = instance not in pri_img.instances and not pri_img.offline
1663 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1664 "instance not running on its primary node %s",
1667 for node, n_img in node_image.items():
1668 if (not node == node_current):
1669 test = instance in n_img.instances
1670 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1671 "instance should not run on node %s", node)
1673 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1674 """Verify if there are any unknown volumes in the cluster.
1676 The .os, .swap and backup volumes are ignored. All other volumes are
1677 reported as unknown.
1679 @type reserved: L{ganeti.utils.FieldSet}
1680 @param reserved: a FieldSet of reserved volume names
1683 for node, n_img in node_image.items():
1684 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1685 # skip non-healthy nodes
1687 for volume in n_img.volumes:
1688 test = ((node not in node_vol_should or
1689 volume not in node_vol_should[node]) and
1690 not reserved.Matches(volume))
1691 self._ErrorIf(test, self.ENODEORPHANLV, node,
1692 "volume %s is unknown", volume)
1694 def _VerifyOrphanInstances(self, instancelist, node_image):
1695 """Verify the list of running instances.
1697 This checks what instances are running but unknown to the cluster.
1700 for node, n_img in node_image.items():
1701 for o_inst in n_img.instances:
1702 test = o_inst not in instancelist
1703 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1704 "instance %s on node %s should not exist", o_inst, node)
1706 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1707 """Verify N+1 Memory Resilience.
1709 Check that if one single node dies we can still start all the
1710 instances it was primary for.
1713 for node, n_img in node_image.items():
1714 # This code checks that every node which is now listed as
1715 # secondary has enough memory to host all instances it is
1716 # supposed to should a single other node in the cluster fail.
1717 # FIXME: not ready for failover to an arbitrary node
1718 # FIXME: does not support file-backed instances
1719 # WARNING: we currently take into account down instances as well
1720 # as up ones, considering that even if they're down someone
1721 # might want to start them even in the event of a node failure.
1722 for prinode, instances in n_img.sbp.items():
1724 for instance in instances:
1725 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1726 if bep[constants.BE_AUTO_BALANCE]:
1727 needed_mem += bep[constants.BE_MEMORY]
1728 test = n_img.mfree < needed_mem
1729 self._ErrorIf(test, self.ENODEN1, node,
1730 "not enough memory on to accommodate"
1731 " failovers should peer node %s fail", prinode)
1733 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1735 """Verifies and computes the node required file checksums.
1737 @type ninfo: L{objects.Node}
1738 @param ninfo: the node to check
1739 @param nresult: the remote results for the node
1740 @param file_list: required list of files
1741 @param local_cksum: dictionary of local files and their checksums
1742 @param master_files: list of files that only masters should have
1746 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1748 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1749 test = not isinstance(remote_cksum, dict)
1750 _ErrorIf(test, self.ENODEFILECHECK, node,
1751 "node hasn't returned file checksum data")
1755 for file_name in file_list:
1756 node_is_mc = ninfo.master_candidate
1757 must_have = (file_name not in master_files) or node_is_mc
1759 test1 = file_name not in remote_cksum
1761 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1763 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1764 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1765 "file '%s' missing", file_name)
1766 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1767 "file '%s' has wrong checksum", file_name)
1768 # not candidate and this is not a must-have file
1769 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1770 "file '%s' should not exist on non master"
1771 " candidates (and the file is outdated)", file_name)
1772 # all good, except non-master/non-must have combination
1773 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1774 "file '%s' should not exist"
1775 " on non master candidates", file_name)
1777 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1779 """Verifies and the node DRBD status.
1781 @type ninfo: L{objects.Node}
1782 @param ninfo: the node to check
1783 @param nresult: the remote results for the node
1784 @param instanceinfo: the dict of instances
1785 @param drbd_helper: the configured DRBD usermode helper
1786 @param drbd_map: the DRBD map as returned by
1787 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1791 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1794 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1795 test = (helper_result == None)
1796 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1797 "no drbd usermode helper returned")
1799 status, payload = helper_result
1801 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1802 "drbd usermode helper check unsuccessful: %s", payload)
1803 test = status and (payload != drbd_helper)
1804 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1805 "wrong drbd usermode helper: %s", payload)
1807 # compute the DRBD minors
1809 for minor, instance in drbd_map[node].items():
1810 test = instance not in instanceinfo
1811 _ErrorIf(test, self.ECLUSTERCFG, None,
1812 "ghost instance '%s' in temporary DRBD map", instance)
1813 # ghost instance should not be running, but otherwise we
1814 # don't give double warnings (both ghost instance and
1815 # unallocated minor in use)
1817 node_drbd[minor] = (instance, False)
1819 instance = instanceinfo[instance]
1820 node_drbd[minor] = (instance.name, instance.admin_up)
1822 # and now check them
1823 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1824 test = not isinstance(used_minors, (tuple, list))
1825 _ErrorIf(test, self.ENODEDRBD, node,
1826 "cannot parse drbd status file: %s", str(used_minors))
1828 # we cannot check drbd status
1831 for minor, (iname, must_exist) in node_drbd.items():
1832 test = minor not in used_minors and must_exist
1833 _ErrorIf(test, self.ENODEDRBD, node,
1834 "drbd minor %d of instance %s is not active", minor, iname)
1835 for minor in used_minors:
1836 test = minor not in node_drbd
1837 _ErrorIf(test, self.ENODEDRBD, node,
1838 "unallocated drbd minor %d is in use", minor)
1840 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1841 """Builds the node OS structures.
1843 @type ninfo: L{objects.Node}
1844 @param ninfo: the node to check
1845 @param nresult: the remote results for the node
1846 @param nimg: the node image object
1850 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1852 remote_os = nresult.get(constants.NV_OSLIST, None)
1853 test = (not isinstance(remote_os, list) or
1854 not compat.all(isinstance(v, list) and len(v) == 7
1855 for v in remote_os))
1857 _ErrorIf(test, self.ENODEOS, node,
1858 "node hasn't returned valid OS data")
1867 for (name, os_path, status, diagnose,
1868 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1870 if name not in os_dict:
1873 # parameters is a list of lists instead of list of tuples due to
1874 # JSON lacking a real tuple type, fix it:
1875 parameters = [tuple(v) for v in parameters]
1876 os_dict[name].append((os_path, status, diagnose,
1877 set(variants), set(parameters), set(api_ver)))
1879 nimg.oslist = os_dict
1881 def _VerifyNodeOS(self, ninfo, nimg, base):
1882 """Verifies the node OS list.
1884 @type ninfo: L{objects.Node}
1885 @param ninfo: the node to check
1886 @param nimg: the node image object
1887 @param base: the 'template' node we match against (e.g. from the master)
1891 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1893 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1895 for os_name, os_data in nimg.oslist.items():
1896 assert os_data, "Empty OS status for OS %s?!" % os_name
1897 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1898 _ErrorIf(not f_status, self.ENODEOS, node,
1899 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1900 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1901 "OS '%s' has multiple entries (first one shadows the rest): %s",
1902 os_name, utils.CommaJoin([v[0] for v in os_data]))
1903 # this will catched in backend too
1904 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1905 and not f_var, self.ENODEOS, node,
1906 "OS %s with API at least %d does not declare any variant",
1907 os_name, constants.OS_API_V15)
1908 # comparisons with the 'base' image
1909 test = os_name not in base.oslist
1910 _ErrorIf(test, self.ENODEOS, node,
1911 "Extra OS %s not present on reference node (%s)",
1915 assert base.oslist[os_name], "Base node has empty OS status?"
1916 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1918 # base OS is invalid, skipping
1920 for kind, a, b in [("API version", f_api, b_api),
1921 ("variants list", f_var, b_var),
1922 ("parameters", f_param, b_param)]:
1923 _ErrorIf(a != b, self.ENODEOS, node,
1924 "OS %s %s differs from reference node %s: %s vs. %s",
1925 kind, os_name, base.name,
1926 utils.CommaJoin(a), utils.CommaJoin(b))
1928 # check any missing OSes
1929 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1930 _ErrorIf(missing, self.ENODEOS, node,
1931 "OSes present on reference node %s but missing on this node: %s",
1932 base.name, utils.CommaJoin(missing))
1934 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1935 """Verifies and updates the node volume data.
1937 This function will update a L{NodeImage}'s internal structures
1938 with data from the remote call.
1940 @type ninfo: L{objects.Node}
1941 @param ninfo: the node to check
1942 @param nresult: the remote results for the node
1943 @param nimg: the node image object
1944 @param vg_name: the configured VG name
1948 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1950 nimg.lvm_fail = True
1951 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1954 elif isinstance(lvdata, basestring):
1955 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1956 utils.SafeEncode(lvdata))
1957 elif not isinstance(lvdata, dict):
1958 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1960 nimg.volumes = lvdata
1961 nimg.lvm_fail = False
1963 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1964 """Verifies and updates the node instance list.
1966 If the listing was successful, then updates this node's instance
1967 list. Otherwise, it marks the RPC call as failed for the instance
1970 @type ninfo: L{objects.Node}
1971 @param ninfo: the node to check
1972 @param nresult: the remote results for the node
1973 @param nimg: the node image object
1976 idata = nresult.get(constants.NV_INSTANCELIST, None)
1977 test = not isinstance(idata, list)
1978 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1979 " (instancelist): %s", utils.SafeEncode(str(idata)))
1981 nimg.hyp_fail = True
1983 nimg.instances = idata
1985 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1986 """Verifies and computes a node information map
1988 @type ninfo: L{objects.Node}
1989 @param ninfo: the node to check
1990 @param nresult: the remote results for the node
1991 @param nimg: the node image object
1992 @param vg_name: the configured VG name
1996 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1998 # try to read free memory (from the hypervisor)
1999 hv_info = nresult.get(constants.NV_HVINFO, None)
2000 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2001 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2004 nimg.mfree = int(hv_info["memory_free"])
2005 except (ValueError, TypeError):
2006 _ErrorIf(True, self.ENODERPC, node,
2007 "node returned invalid nodeinfo, check hypervisor")
2009 # FIXME: devise a free space model for file based instances as well
2010 if vg_name is not None:
2011 test = (constants.NV_VGLIST not in nresult or
2012 vg_name not in nresult[constants.NV_VGLIST])
2013 _ErrorIf(test, self.ENODELVM, node,
2014 "node didn't return data for the volume group '%s'"
2015 " - it is either missing or broken", vg_name)
2018 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2019 except (ValueError, TypeError):
2020 _ErrorIf(True, self.ENODERPC, node,
2021 "node returned invalid LVM info, check LVM status")
2023 def BuildHooksEnv(self):
2026 Cluster-Verify hooks just ran in the post phase and their failure makes
2027 the output be logged in the verify output and the verification to fail.
2030 all_nodes = self.cfg.GetNodeList()
2032 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2034 for node in self.cfg.GetAllNodesInfo().values():
2035 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2037 return env, [], all_nodes
2039 def Exec(self, feedback_fn):
2040 """Verify integrity of cluster, performing various test on nodes.
2044 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2045 verbose = self.op.verbose
2046 self._feedback_fn = feedback_fn
2047 feedback_fn("* Verifying global settings")
2048 for msg in self.cfg.VerifyConfig():
2049 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2051 # Check the cluster certificates
2052 for cert_filename in constants.ALL_CERT_FILES:
2053 (errcode, msg) = _VerifyCertificate(cert_filename)
2054 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2056 vg_name = self.cfg.GetVGName()
2057 drbd_helper = self.cfg.GetDRBDHelper()
2058 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2059 cluster = self.cfg.GetClusterInfo()
2060 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2061 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2062 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2063 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2064 for iname in instancelist)
2065 i_non_redundant = [] # Non redundant instances
2066 i_non_a_balanced = [] # Non auto-balanced instances
2067 n_offline = 0 # Count of offline nodes
2068 n_drained = 0 # Count of nodes being drained
2069 node_vol_should = {}
2071 # FIXME: verify OS list
2072 # do local checksums
2073 master_files = [constants.CLUSTER_CONF_FILE]
2074 master_node = self.master_node = self.cfg.GetMasterNode()
2075 master_ip = self.cfg.GetMasterIP()
2077 file_names = ssconf.SimpleStore().GetFileList()
2078 file_names.extend(constants.ALL_CERT_FILES)
2079 file_names.extend(master_files)
2080 if cluster.modify_etc_hosts:
2081 file_names.append(constants.ETC_HOSTS)
2083 local_checksums = utils.FingerprintFiles(file_names)
2085 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2086 node_verify_param = {
2087 constants.NV_FILELIST: file_names,
2088 constants.NV_NODELIST: [node.name for node in nodeinfo
2089 if not node.offline],
2090 constants.NV_HYPERVISOR: hypervisors,
2091 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2092 node.secondary_ip) for node in nodeinfo
2093 if not node.offline],
2094 constants.NV_INSTANCELIST: hypervisors,
2095 constants.NV_VERSION: None,
2096 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2097 constants.NV_NODESETUP: None,
2098 constants.NV_TIME: None,
2099 constants.NV_MASTERIP: (master_node, master_ip),
2100 constants.NV_OSLIST: None,
2103 if vg_name is not None:
2104 node_verify_param[constants.NV_VGLIST] = None
2105 node_verify_param[constants.NV_LVLIST] = vg_name
2106 node_verify_param[constants.NV_PVLIST] = [vg_name]
2107 node_verify_param[constants.NV_DRBDLIST] = None
2110 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2112 # Build our expected cluster state
2113 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2115 for node in nodeinfo)
2117 for instance in instancelist:
2118 inst_config = instanceinfo[instance]
2120 for nname in inst_config.all_nodes:
2121 if nname not in node_image:
2123 gnode = self.NodeImage(name=nname)
2125 node_image[nname] = gnode
2127 inst_config.MapLVsByNode(node_vol_should)
2129 pnode = inst_config.primary_node
2130 node_image[pnode].pinst.append(instance)
2132 for snode in inst_config.secondary_nodes:
2133 nimg = node_image[snode]
2134 nimg.sinst.append(instance)
2135 if pnode not in nimg.sbp:
2136 nimg.sbp[pnode] = []
2137 nimg.sbp[pnode].append(instance)
2139 # At this point, we have the in-memory data structures complete,
2140 # except for the runtime information, which we'll gather next
2142 # Due to the way our RPC system works, exact response times cannot be
2143 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2144 # time before and after executing the request, we can at least have a time
2146 nvinfo_starttime = time.time()
2147 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2148 self.cfg.GetClusterName())
2149 nvinfo_endtime = time.time()
2151 all_drbd_map = self.cfg.ComputeDRBDMap()
2153 feedback_fn("* Verifying node status")
2157 for node_i in nodeinfo:
2159 nimg = node_image[node]
2163 feedback_fn("* Skipping offline node %s" % (node,))
2167 if node == master_node:
2169 elif node_i.master_candidate:
2170 ntype = "master candidate"
2171 elif node_i.drained:
2177 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2179 msg = all_nvinfo[node].fail_msg
2180 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2182 nimg.rpc_fail = True
2185 nresult = all_nvinfo[node].payload
2187 nimg.call_ok = self._VerifyNode(node_i, nresult)
2188 self._VerifyNodeNetwork(node_i, nresult)
2189 self._VerifyNodeLVM(node_i, nresult, vg_name)
2190 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2192 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2194 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2196 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2197 self._UpdateNodeInstances(node_i, nresult, nimg)
2198 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2199 self._UpdateNodeOS(node_i, nresult, nimg)
2200 if not nimg.os_fail:
2201 if refos_img is None:
2203 self._VerifyNodeOS(node_i, nimg, refos_img)
2205 feedback_fn("* Verifying instance status")
2206 for instance in instancelist:
2208 feedback_fn("* Verifying instance %s" % instance)
2209 inst_config = instanceinfo[instance]
2210 self._VerifyInstance(instance, inst_config, node_image)
2211 inst_nodes_offline = []
2213 pnode = inst_config.primary_node
2214 pnode_img = node_image[pnode]
2215 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2216 self.ENODERPC, pnode, "instance %s, connection to"
2217 " primary node failed", instance)
2219 if pnode_img.offline:
2220 inst_nodes_offline.append(pnode)
2222 # If the instance is non-redundant we cannot survive losing its primary
2223 # node, so we are not N+1 compliant. On the other hand we have no disk
2224 # templates with more than one secondary so that situation is not well
2226 # FIXME: does not support file-backed instances
2227 if not inst_config.secondary_nodes:
2228 i_non_redundant.append(instance)
2229 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2230 instance, "instance has multiple secondary nodes: %s",
2231 utils.CommaJoin(inst_config.secondary_nodes),
2232 code=self.ETYPE_WARNING)
2234 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2235 i_non_a_balanced.append(instance)
2237 for snode in inst_config.secondary_nodes:
2238 s_img = node_image[snode]
2239 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2240 "instance %s, connection to secondary node failed", instance)
2243 inst_nodes_offline.append(snode)
2245 # warn that the instance lives on offline nodes
2246 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2247 "instance lives on offline node(s) %s",
2248 utils.CommaJoin(inst_nodes_offline))
2249 # ... or ghost nodes
2250 for node in inst_config.all_nodes:
2251 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2252 "instance lives on ghost node %s", node)
2254 feedback_fn("* Verifying orphan volumes")
2255 reserved = utils.FieldSet(*cluster.reserved_lvs)
2256 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2258 feedback_fn("* Verifying orphan instances")
2259 self._VerifyOrphanInstances(instancelist, node_image)
2261 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2262 feedback_fn("* Verifying N+1 Memory redundancy")
2263 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2265 feedback_fn("* Other Notes")
2267 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2268 % len(i_non_redundant))
2270 if i_non_a_balanced:
2271 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2272 % len(i_non_a_balanced))
2275 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2278 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2282 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2283 """Analyze the post-hooks' result
2285 This method analyses the hook result, handles it, and sends some
2286 nicely-formatted feedback back to the user.
2288 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2289 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2290 @param hooks_results: the results of the multi-node hooks rpc call
2291 @param feedback_fn: function used send feedback back to the caller
2292 @param lu_result: previous Exec result
2293 @return: the new Exec result, based on the previous result
2297 # We only really run POST phase hooks, and are only interested in
2299 if phase == constants.HOOKS_PHASE_POST:
2300 # Used to change hooks' output to proper indentation
2301 indent_re = re.compile('^', re.M)
2302 feedback_fn("* Hooks Results")
2303 assert hooks_results, "invalid result from hooks"
2305 for node_name in hooks_results:
2306 res = hooks_results[node_name]
2308 test = msg and not res.offline
2309 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2310 "Communication failure in hooks execution: %s", msg)
2311 if res.offline or msg:
2312 # No need to investigate payload if node is offline or gave an error.
2313 # override manually lu_result here as _ErrorIf only
2314 # overrides self.bad
2317 for script, hkr, output in res.payload:
2318 test = hkr == constants.HKR_FAIL
2319 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2320 "Script %s failed, output:", script)
2322 output = indent_re.sub(' ', output)
2323 feedback_fn("%s" % output)
2329 class LUVerifyDisks(NoHooksLU):
2330 """Verifies the cluster disks status.
2335 def ExpandNames(self):
2336 self.needed_locks = {
2337 locking.LEVEL_NODE: locking.ALL_SET,
2338 locking.LEVEL_INSTANCE: locking.ALL_SET,
2340 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2342 def Exec(self, feedback_fn):
2343 """Verify integrity of cluster disks.
2345 @rtype: tuple of three items
2346 @return: a tuple of (dict of node-to-node_error, list of instances
2347 which need activate-disks, dict of instance: (node, volume) for
2351 result = res_nodes, res_instances, res_missing = {}, [], {}
2353 vg_name = self.cfg.GetVGName()
2354 nodes = utils.NiceSort(self.cfg.GetNodeList())
2355 instances = [self.cfg.GetInstanceInfo(name)
2356 for name in self.cfg.GetInstanceList()]
2359 for inst in instances:
2361 if (not inst.admin_up or
2362 inst.disk_template not in constants.DTS_NET_MIRROR):
2364 inst.MapLVsByNode(inst_lvs)
2365 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2366 for node, vol_list in inst_lvs.iteritems():
2367 for vol in vol_list:
2368 nv_dict[(node, vol)] = inst
2373 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2377 node_res = node_lvs[node]
2378 if node_res.offline:
2380 msg = node_res.fail_msg
2382 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2383 res_nodes[node] = msg
2386 lvs = node_res.payload
2387 for lv_name, (_, _, lv_online) in lvs.items():
2388 inst = nv_dict.pop((node, lv_name), None)
2389 if (not lv_online and inst is not None
2390 and inst.name not in res_instances):
2391 res_instances.append(inst.name)
2393 # any leftover items in nv_dict are missing LVs, let's arrange the
2395 for key, inst in nv_dict.iteritems():
2396 if inst.name not in res_missing:
2397 res_missing[inst.name] = []
2398 res_missing[inst.name].append(key)
2403 class LURepairDiskSizes(NoHooksLU):
2404 """Verifies the cluster disks sizes.
2407 _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2410 def ExpandNames(self):
2411 if self.op.instances:
2412 self.wanted_names = []
2413 for name in self.op.instances:
2414 full_name = _ExpandInstanceName(self.cfg, name)
2415 self.wanted_names.append(full_name)
2416 self.needed_locks = {
2417 locking.LEVEL_NODE: [],
2418 locking.LEVEL_INSTANCE: self.wanted_names,
2420 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2422 self.wanted_names = None
2423 self.needed_locks = {
2424 locking.LEVEL_NODE: locking.ALL_SET,
2425 locking.LEVEL_INSTANCE: locking.ALL_SET,
2427 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2429 def DeclareLocks(self, level):
2430 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2431 self._LockInstancesNodes(primary_only=True)
2433 def CheckPrereq(self):
2434 """Check prerequisites.
2436 This only checks the optional instance list against the existing names.
2439 if self.wanted_names is None:
2440 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2442 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2443 in self.wanted_names]
2445 def _EnsureChildSizes(self, disk):
2446 """Ensure children of the disk have the needed disk size.
2448 This is valid mainly for DRBD8 and fixes an issue where the
2449 children have smaller disk size.
2451 @param disk: an L{ganeti.objects.Disk} object
2454 if disk.dev_type == constants.LD_DRBD8:
2455 assert disk.children, "Empty children for DRBD8?"
2456 fchild = disk.children[0]
2457 mismatch = fchild.size < disk.size
2459 self.LogInfo("Child disk has size %d, parent %d, fixing",
2460 fchild.size, disk.size)
2461 fchild.size = disk.size
2463 # and we recurse on this child only, not on the metadev
2464 return self._EnsureChildSizes(fchild) or mismatch
2468 def Exec(self, feedback_fn):
2469 """Verify the size of cluster disks.
2472 # TODO: check child disks too
2473 # TODO: check differences in size between primary/secondary nodes
2475 for instance in self.wanted_instances:
2476 pnode = instance.primary_node
2477 if pnode not in per_node_disks:
2478 per_node_disks[pnode] = []
2479 for idx, disk in enumerate(instance.disks):
2480 per_node_disks[pnode].append((instance, idx, disk))
2483 for node, dskl in per_node_disks.items():
2484 newl = [v[2].Copy() for v in dskl]
2486 self.cfg.SetDiskID(dsk, node)
2487 result = self.rpc.call_blockdev_getsizes(node, newl)
2489 self.LogWarning("Failure in blockdev_getsizes call to node"
2490 " %s, ignoring", node)
2492 if len(result.data) != len(dskl):
2493 self.LogWarning("Invalid result from node %s, ignoring node results",
2496 for ((instance, idx, disk), size) in zip(dskl, result.data):
2498 self.LogWarning("Disk %d of instance %s did not return size"
2499 " information, ignoring", idx, instance.name)
2501 if not isinstance(size, (int, long)):
2502 self.LogWarning("Disk %d of instance %s did not return valid"
2503 " size information, ignoring", idx, instance.name)
2506 if size != disk.size:
2507 self.LogInfo("Disk %d of instance %s has mismatched size,"
2508 " correcting: recorded %d, actual %d", idx,
2509 instance.name, disk.size, size)
2511 self.cfg.Update(instance, feedback_fn)
2512 changed.append((instance.name, idx, size))
2513 if self._EnsureChildSizes(disk):
2514 self.cfg.Update(instance, feedback_fn)
2515 changed.append((instance.name, idx, disk.size))
2519 class LURenameCluster(LogicalUnit):
2520 """Rename the cluster.
2523 HPATH = "cluster-rename"
2524 HTYPE = constants.HTYPE_CLUSTER
2525 _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2527 def BuildHooksEnv(self):
2532 "OP_TARGET": self.cfg.GetClusterName(),
2533 "NEW_NAME": self.op.name,
2535 mn = self.cfg.GetMasterNode()
2536 all_nodes = self.cfg.GetNodeList()
2537 return env, [mn], all_nodes
2539 def CheckPrereq(self):
2540 """Verify that the passed name is a valid one.
2543 hostname = netutils.GetHostInfo(self.op.name)
2545 new_name = hostname.name
2546 self.ip = new_ip = hostname.ip
2547 old_name = self.cfg.GetClusterName()
2548 old_ip = self.cfg.GetMasterIP()
2549 if new_name == old_name and new_ip == old_ip:
2550 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2551 " cluster has changed",
2553 if new_ip != old_ip:
2554 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2555 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2556 " reachable on the network. Aborting." %
2557 new_ip, errors.ECODE_NOTUNIQUE)
2559 self.op.name = new_name
2561 def Exec(self, feedback_fn):
2562 """Rename the cluster.
2565 clustername = self.op.name
2568 # shutdown the master IP
2569 master = self.cfg.GetMasterNode()
2570 result = self.rpc.call_node_stop_master(master, False)
2571 result.Raise("Could not disable the master role")
2574 cluster = self.cfg.GetClusterInfo()
2575 cluster.cluster_name = clustername
2576 cluster.master_ip = ip
2577 self.cfg.Update(cluster, feedback_fn)
2579 # update the known hosts file
2580 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2581 node_list = self.cfg.GetNodeList()
2583 node_list.remove(master)
2586 result = self.rpc.call_upload_file(node_list,
2587 constants.SSH_KNOWN_HOSTS_FILE)
2588 for to_node, to_result in result.iteritems():
2589 msg = to_result.fail_msg
2591 msg = ("Copy of file %s to node %s failed: %s" %
2592 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2593 self.proc.LogWarning(msg)
2596 result = self.rpc.call_node_start_master(master, False, False)
2597 msg = result.fail_msg
2599 self.LogWarning("Could not re-enable the master role on"
2600 " the master, please restart manually: %s", msg)
2605 class LUSetClusterParams(LogicalUnit):
2606 """Change the parameters of the cluster.
2609 HPATH = "cluster-modify"
2610 HTYPE = constants.HTYPE_CLUSTER
2612 ("vg_name", None, _TMaybeString),
2613 ("enabled_hypervisors", None,
2614 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2615 ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2616 ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2617 ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2618 ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2619 ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2620 ("uid_pool", None, _NoType),
2621 ("add_uids", None, _NoType),
2622 ("remove_uids", None, _NoType),
2623 ("maintain_node_health", None, _TMaybeBool),
2624 ("nicparams", None, _TOr(_TDict, _TNone)),
2625 ("drbd_helper", None, _TOr(_TString, _TNone)),
2626 ("default_iallocator", None, _TMaybeString),
2627 ("reserved_lvs", None, _TOr(_TListOf(_TNonEmptyString), _TNone)),
2628 ("hidden_oss", None, _TOr(_TListOf(\
2631 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2633 ("blacklisted_oss", None, _TOr(_TListOf(\
2636 _TMap(lambda v: v[0], _TElemOf(constants.DDMS_VALUES)))),
2641 def CheckArguments(self):
2645 if self.op.uid_pool:
2646 uidpool.CheckUidPool(self.op.uid_pool)
2648 if self.op.add_uids:
2649 uidpool.CheckUidPool(self.op.add_uids)
2651 if self.op.remove_uids:
2652 uidpool.CheckUidPool(self.op.remove_uids)
2654 def ExpandNames(self):
2655 # FIXME: in the future maybe other cluster params won't require checking on
2656 # all nodes to be modified.
2657 self.needed_locks = {
2658 locking.LEVEL_NODE: locking.ALL_SET,
2660 self.share_locks[locking.LEVEL_NODE] = 1
2662 def BuildHooksEnv(self):
2667 "OP_TARGET": self.cfg.GetClusterName(),
2668 "NEW_VG_NAME": self.op.vg_name,
2670 mn = self.cfg.GetMasterNode()
2671 return env, [mn], [mn]
2673 def CheckPrereq(self):
2674 """Check prerequisites.
2676 This checks whether the given params don't conflict and
2677 if the given volume group is valid.
2680 if self.op.vg_name is not None and not self.op.vg_name:
2681 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2682 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2683 " instances exist", errors.ECODE_INVAL)
2685 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2686 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2687 raise errors.OpPrereqError("Cannot disable drbd helper while"
2688 " drbd-based instances exist",
2691 node_list = self.acquired_locks[locking.LEVEL_NODE]
2693 # if vg_name not None, checks given volume group on all nodes
2695 vglist = self.rpc.call_vg_list(node_list)
2696 for node in node_list:
2697 msg = vglist[node].fail_msg
2699 # ignoring down node
2700 self.LogWarning("Error while gathering data on node %s"
2701 " (ignoring node): %s", node, msg)
2703 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2705 constants.MIN_VG_SIZE)
2707 raise errors.OpPrereqError("Error on node '%s': %s" %
2708 (node, vgstatus), errors.ECODE_ENVIRON)
2710 if self.op.drbd_helper:
2711 # checks given drbd helper on all nodes
2712 helpers = self.rpc.call_drbd_helper(node_list)
2713 for node in node_list:
2714 ninfo = self.cfg.GetNodeInfo(node)
2716 self.LogInfo("Not checking drbd helper on offline node %s", node)
2718 msg = helpers[node].fail_msg
2720 raise errors.OpPrereqError("Error checking drbd helper on node"
2721 " '%s': %s" % (node, msg),
2722 errors.ECODE_ENVIRON)
2723 node_helper = helpers[node].payload
2724 if node_helper != self.op.drbd_helper:
2725 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2726 (node, node_helper), errors.ECODE_ENVIRON)
2728 self.cluster = cluster = self.cfg.GetClusterInfo()
2729 # validate params changes
2730 if self.op.beparams:
2731 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2732 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2734 if self.op.nicparams:
2735 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2736 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2737 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2740 # check all instances for consistency
2741 for instance in self.cfg.GetAllInstancesInfo().values():
2742 for nic_idx, nic in enumerate(instance.nics):
2743 params_copy = copy.deepcopy(nic.nicparams)
2744 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2746 # check parameter syntax
2748 objects.NIC.CheckParameterSyntax(params_filled)
2749 except errors.ConfigurationError, err:
2750 nic_errors.append("Instance %s, nic/%d: %s" %
2751 (instance.name, nic_idx, err))
2753 # if we're moving instances to routed, check that they have an ip
2754 target_mode = params_filled[constants.NIC_MODE]
2755 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2756 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2757 (instance.name, nic_idx))
2759 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2760 "\n".join(nic_errors))
2762 # hypervisor list/parameters
2763 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2764 if self.op.hvparams:
2765 for hv_name, hv_dict in self.op.hvparams.items():
2766 if hv_name not in self.new_hvparams:
2767 self.new_hvparams[hv_name] = hv_dict
2769 self.new_hvparams[hv_name].update(hv_dict)
2771 # os hypervisor parameters
2772 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2774 for os_name, hvs in self.op.os_hvp.items():
2775 if os_name not in self.new_os_hvp:
2776 self.new_os_hvp[os_name] = hvs
2778 for hv_name, hv_dict in hvs.items():
2779 if hv_name not in self.new_os_hvp[os_name]:
2780 self.new_os_hvp[os_name][hv_name] = hv_dict
2782 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2785 self.new_osp = objects.FillDict(cluster.osparams, {})
2786 if self.op.osparams:
2787 for os_name, osp in self.op.osparams.items():
2788 if os_name not in self.new_osp:
2789 self.new_osp[os_name] = {}
2791 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2794 if not self.new_osp[os_name]:
2795 # we removed all parameters
2796 del self.new_osp[os_name]
2798 # check the parameter validity (remote check)
2799 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2800 os_name, self.new_osp[os_name])
2802 # changes to the hypervisor list
2803 if self.op.enabled_hypervisors is not None:
2804 self.hv_list = self.op.enabled_hypervisors
2805 for hv in self.hv_list:
2806 # if the hypervisor doesn't already exist in the cluster
2807 # hvparams, we initialize it to empty, and then (in both
2808 # cases) we make sure to fill the defaults, as we might not
2809 # have a complete defaults list if the hypervisor wasn't
2811 if hv not in new_hvp:
2813 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2814 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2816 self.hv_list = cluster.enabled_hypervisors
2818 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2819 # either the enabled list has changed, or the parameters have, validate
2820 for hv_name, hv_params in self.new_hvparams.items():
2821 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2822 (self.op.enabled_hypervisors and
2823 hv_name in self.op.enabled_hypervisors)):
2824 # either this is a new hypervisor, or its parameters have changed
2825 hv_class = hypervisor.GetHypervisor(hv_name)
2826 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2827 hv_class.CheckParameterSyntax(hv_params)
2828 _CheckHVParams(self, node_list, hv_name, hv_params)
2831 # no need to check any newly-enabled hypervisors, since the
2832 # defaults have already been checked in the above code-block
2833 for os_name, os_hvp in self.new_os_hvp.items():
2834 for hv_name, hv_params in os_hvp.items():
2835 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2836 # we need to fill in the new os_hvp on top of the actual hv_p
2837 cluster_defaults = self.new_hvparams.get(hv_name, {})
2838 new_osp = objects.FillDict(cluster_defaults, hv_params)
2839 hv_class = hypervisor.GetHypervisor(hv_name)
2840 hv_class.CheckParameterSyntax(new_osp)
2841 _CheckHVParams(self, node_list, hv_name, new_osp)
2843 if self.op.default_iallocator:
2844 alloc_script = utils.FindFile(self.op.default_iallocator,
2845 constants.IALLOCATOR_SEARCH_PATH,
2847 if alloc_script is None:
2848 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2849 " specified" % self.op.default_iallocator,
2852 def Exec(self, feedback_fn):
2853 """Change the parameters of the cluster.
2856 if self.op.vg_name is not None:
2857 new_volume = self.op.vg_name
2860 if new_volume != self.cfg.GetVGName():
2861 self.cfg.SetVGName(new_volume)
2863 feedback_fn("Cluster LVM configuration already in desired"
2864 " state, not changing")
2865 if self.op.drbd_helper is not None:
2866 new_helper = self.op.drbd_helper
2869 if new_helper != self.cfg.GetDRBDHelper():
2870 self.cfg.SetDRBDHelper(new_helper)
2872 feedback_fn("Cluster DRBD helper already in desired state,"
2874 if self.op.hvparams:
2875 self.cluster.hvparams = self.new_hvparams
2877 self.cluster.os_hvp = self.new_os_hvp
2878 if self.op.enabled_hypervisors is not None:
2879 self.cluster.hvparams = self.new_hvparams
2880 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2881 if self.op.beparams:
2882 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2883 if self.op.nicparams:
2884 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2885 if self.op.osparams:
2886 self.cluster.osparams = self.new_osp
2888 if self.op.candidate_pool_size is not None:
2889 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2890 # we need to update the pool size here, otherwise the save will fail
2891 _AdjustCandidatePool(self, [])
2893 if self.op.maintain_node_health is not None:
2894 self.cluster.maintain_node_health = self.op.maintain_node_health
2896 if self.op.add_uids is not None:
2897 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2899 if self.op.remove_uids is not None:
2900 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2902 if self.op.uid_pool is not None:
2903 self.cluster.uid_pool = self.op.uid_pool
2905 if self.op.default_iallocator is not None:
2906 self.cluster.default_iallocator = self.op.default_iallocator
2908 if self.op.reserved_lvs is not None:
2909 self.cluster.reserved_lvs = self.op.reserved_lvs
2911 def helper_oss(aname, mods, desc):
2912 lst = getattr(self.cluster, aname)
2913 for key, val in mods:
2914 if key == constants.DDM_ADD:
2916 feedback_fn("OS %s already in %s, ignoring", val, desc)
2919 elif key == constants.DDM_REMOVE:
2923 feedback_fn("OS %s not found in %s, ignoring", val, desc)
2925 raise errors.ProgrammerError("Invalid modification '%s'" % key)
2927 if self.op.hidden_oss:
2928 helper_oss("hidden_oss", self.op.hidden_oss,
2931 if self.op.blacklisted_oss:
2932 helper_oss("blacklisted_oss", self.op.blacklisted_oss,
2933 "blacklisted OS list")
2935 self.cfg.Update(self.cluster, feedback_fn)
2938 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2939 """Distribute additional files which are part of the cluster configuration.
2941 ConfigWriter takes care of distributing the config and ssconf files, but
2942 there are more files which should be distributed to all nodes. This function
2943 makes sure those are copied.
2945 @param lu: calling logical unit
2946 @param additional_nodes: list of nodes not in the config to distribute to
2949 # 1. Gather target nodes
2950 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2951 dist_nodes = lu.cfg.GetOnlineNodeList()
2952 if additional_nodes is not None:
2953 dist_nodes.extend(additional_nodes)
2954 if myself.name in dist_nodes:
2955 dist_nodes.remove(myself.name)
2957 # 2. Gather files to distribute
2958 dist_files = set([constants.ETC_HOSTS,
2959 constants.SSH_KNOWN_HOSTS_FILE,
2960 constants.RAPI_CERT_FILE,
2961 constants.RAPI_USERS_FILE,
2962 constants.CONFD_HMAC_KEY,
2963 constants.CLUSTER_DOMAIN_SECRET_FILE,
2966 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2967 for hv_name in enabled_hypervisors:
2968 hv_class = hypervisor.GetHypervisor(hv_name)
2969 dist_files.update(hv_class.GetAncillaryFiles())
2971 # 3. Perform the files upload
2972 for fname in dist_files:
2973 if os.path.exists(fname):
2974 result = lu.rpc.call_upload_file(dist_nodes, fname)
2975 for to_node, to_result in result.items():
2976 msg = to_result.fail_msg
2978 msg = ("Copy of file %s to node %s failed: %s" %
2979 (fname, to_node, msg))
2980 lu.proc.LogWarning(msg)
2983 class LURedistributeConfig(NoHooksLU):
2984 """Force the redistribution of cluster configuration.
2986 This is a very simple LU.
2991 def ExpandNames(self):
2992 self.needed_locks = {
2993 locking.LEVEL_NODE: locking.ALL_SET,
2995 self.share_locks[locking.LEVEL_NODE] = 1
2997 def Exec(self, feedback_fn):
2998 """Redistribute the configuration.
3001 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3002 _RedistributeAncillaryFiles(self)
3005 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3006 """Sleep and poll for an instance's disk to sync.
3009 if not instance.disks or disks is not None and not disks:
3012 disks = _ExpandCheckDisks(instance, disks)
3015 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3017 node = instance.primary_node
3020 lu.cfg.SetDiskID(dev, node)
3022 # TODO: Convert to utils.Retry
3025 degr_retries = 10 # in seconds, as we sleep 1 second each time
3029 cumul_degraded = False
3030 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3031 msg = rstats.fail_msg
3033 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3036 raise errors.RemoteError("Can't contact node %s for mirror data,"
3037 " aborting." % node)
3040 rstats = rstats.payload
3042 for i, mstat in enumerate(rstats):
3044 lu.LogWarning("Can't compute data for node %s/%s",
3045 node, disks[i].iv_name)
3048 cumul_degraded = (cumul_degraded or
3049 (mstat.is_degraded and mstat.sync_percent is None))
3050 if mstat.sync_percent is not None:
3052 if mstat.estimated_time is not None:
3053 rem_time = ("%s remaining (estimated)" %
3054 utils.FormatSeconds(mstat.estimated_time))
3055 max_time = mstat.estimated_time
3057 rem_time = "no time estimate"
3058 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3059 (disks[i].iv_name, mstat.sync_percent, rem_time))
3061 # if we're done but degraded, let's do a few small retries, to
3062 # make sure we see a stable and not transient situation; therefore
3063 # we force restart of the loop
3064 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3065 logging.info("Degraded disks found, %d retries left", degr_retries)
3073 time.sleep(min(60, max_time))
3076 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3077 return not cumul_degraded
3080 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3081 """Check that mirrors are not degraded.
3083 The ldisk parameter, if True, will change the test from the
3084 is_degraded attribute (which represents overall non-ok status for
3085 the device(s)) to the ldisk (representing the local storage status).
3088 lu.cfg.SetDiskID(dev, node)
3092 if on_primary or dev.AssembleOnSecondary():
3093 rstats = lu.rpc.call_blockdev_find(node, dev)
3094 msg = rstats.fail_msg
3096 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3098 elif not rstats.payload:
3099 lu.LogWarning("Can't find disk on node %s", node)
3103 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3105 result = result and not rstats.payload.is_degraded
3108 for child in dev.children:
3109 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3114 class LUDiagnoseOS(NoHooksLU):
3115 """Logical unit for OS diagnose/query.
3120 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3124 _BLK = "blacklisted"
3126 _FIELDS_STATIC = utils.FieldSet()
3127 _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3128 "parameters", "api_versions", _HID, _BLK)
3130 def CheckArguments(self):
3132 raise errors.OpPrereqError("Selective OS query not supported",
3135 _CheckOutputFields(static=self._FIELDS_STATIC,
3136 dynamic=self._FIELDS_DYNAMIC,
3137 selected=self.op.output_fields)
3139 def ExpandNames(self):
3140 # Lock all nodes, in shared mode
3141 # Temporary removal of locks, should be reverted later
3142 # TODO: reintroduce locks when they are lighter-weight
3143 self.needed_locks = {}
3144 #self.share_locks[locking.LEVEL_NODE] = 1
3145 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3148 def _DiagnoseByOS(rlist):
3149 """Remaps a per-node return list into an a per-os per-node dictionary
3151 @param rlist: a map with node names as keys and OS objects as values
3154 @return: a dictionary with osnames as keys and as value another
3155 map, with nodes as keys and tuples of (path, status, diagnose,
3156 variants, parameters, api_versions) as values, eg::
3158 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3159 (/srv/..., False, "invalid api")],
3160 "node2": [(/srv/..., True, "", [], [])]}
3165 # we build here the list of nodes that didn't fail the RPC (at RPC
3166 # level), so that nodes with a non-responding node daemon don't
3167 # make all OSes invalid
3168 good_nodes = [node_name for node_name in rlist
3169 if not rlist[node_name].fail_msg]
3170 for node_name, nr in rlist.items():
3171 if nr.fail_msg or not nr.payload:
3173 for (name, path, status, diagnose, variants,
3174 params, api_versions) in nr.payload:
3175 if name not in all_os:
3176 # build a list of nodes for this os containing empty lists
3177 # for each node in node_list
3179 for nname in good_nodes:
3180 all_os[name][nname] = []
3181 # convert params from [name, help] to (name, help)
3182 params = [tuple(v) for v in params]
3183 all_os[name][node_name].append((path, status, diagnose,
3184 variants, params, api_versions))
3187 def Exec(self, feedback_fn):
3188 """Compute the list of OSes.
3191 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3192 node_data = self.rpc.call_os_diagnose(valid_nodes)
3193 pol = self._DiagnoseByOS(node_data)
3195 cluster = self.cfg.GetClusterInfo()
3197 for os_name, os_data in pol.items():
3200 (variants, params, api_versions) = null_state = (set(), set(), set())
3201 for idx, osl in enumerate(os_data.values()):
3202 valid = bool(valid and osl and osl[0][1])
3204 (variants, params, api_versions) = null_state
3206 node_variants, node_params, node_api = osl[0][3:6]
3207 if idx == 0: # first entry
3208 variants = set(node_variants)
3209 params = set(node_params)
3210 api_versions = set(node_api)
3211 else: # keep consistency
3212 variants.intersection_update(node_variants)
3213 params.intersection_update(node_params)
3214 api_versions.intersection_update(node_api)
3216 is_hid = os_name in cluster.hidden_oss
3217 is_blk = os_name in cluster.blacklisted_oss
3218 if ((self._HID not in self.op.output_fields and is_hid) or
3219 (self._BLK not in self.op.output_fields and is_blk) or
3220 (self._VLD not in self.op.output_fields and not valid)):
3223 for field in self.op.output_fields:
3226 elif field == self._VLD:
3228 elif field == "node_status":
3229 # this is just a copy of the dict
3231 for node_name, nos_list in os_data.items():
3232 val[node_name] = nos_list
3233 elif field == "variants":
3234 val = list(variants)
3235 elif field == "parameters":
3237 elif field == "api_versions":
3238 val = list(api_versions)
3239 elif field == self._HID:
3241 elif field == self._BLK:
3244 raise errors.ParameterError(field)
3251 class LURemoveNode(LogicalUnit):
3252 """Logical unit for removing a node.
3255 HPATH = "node-remove"
3256 HTYPE = constants.HTYPE_NODE
3261 def BuildHooksEnv(self):
3264 This doesn't run on the target node in the pre phase as a failed
3265 node would then be impossible to remove.
3269 "OP_TARGET": self.op.node_name,
3270 "NODE_NAME": self.op.node_name,
3272 all_nodes = self.cfg.GetNodeList()
3274 all_nodes.remove(self.op.node_name)
3276 logging.warning("Node %s which is about to be removed not found"
3277 " in the all nodes list", self.op.node_name)
3278 return env, all_nodes, all_nodes
3280 def CheckPrereq(self):
3281 """Check prerequisites.
3284 - the node exists in the configuration
3285 - it does not have primary or secondary instances
3286 - it's not the master
3288 Any errors are signaled by raising errors.OpPrereqError.
3291 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3292 node = self.cfg.GetNodeInfo(self.op.node_name)
3293 assert node is not None
3295 instance_list = self.cfg.GetInstanceList()
3297 masternode = self.cfg.GetMasterNode()
3298 if node.name == masternode:
3299 raise errors.OpPrereqError("Node is the master node,"
3300 " you need to failover first.",
3303 for instance_name in instance_list:
3304 instance = self.cfg.GetInstanceInfo(instance_name)
3305 if node.name in instance.all_nodes:
3306 raise errors.OpPrereqError("Instance %s is still running on the node,"
3307 " please remove first." % instance_name,
3309 self.op.node_name = node.name
3312 def Exec(self, feedback_fn):
3313 """Removes the node from the cluster.
3317 logging.info("Stopping the node daemon and removing configs from node %s",
3320 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3322 # Promote nodes to master candidate as needed
3323 _AdjustCandidatePool(self, exceptions=[node.name])
3324 self.context.RemoveNode(node.name)
3326 # Run post hooks on the node before it's removed
3327 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3329 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3331 # pylint: disable-msg=W0702
3332 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3334 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3335 msg = result.fail_msg
3337 self.LogWarning("Errors encountered on the remote node while leaving"
3338 " the cluster: %s", msg)
3340 # Remove node from our /etc/hosts
3341 if self.cfg.GetClusterInfo().modify_etc_hosts:
3342 # FIXME: this should be done via an rpc call to node daemon
3343 utils.RemoveHostFromEtcHosts(node.name)
3344 _RedistributeAncillaryFiles(self)
3347 class LUQueryNodes(NoHooksLU):
3348 """Logical unit for querying nodes.
3351 # pylint: disable-msg=W0142
3354 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3355 ("use_locking", False, _TBool),
3359 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3360 "master_candidate", "offline", "drained"]
3362 _FIELDS_DYNAMIC = utils.FieldSet(
3364 "mtotal", "mnode", "mfree",
3366 "ctotal", "cnodes", "csockets",
3369 _FIELDS_STATIC = utils.FieldSet(*[
3370 "pinst_cnt", "sinst_cnt",
3371 "pinst_list", "sinst_list",
3372 "pip", "sip", "tags",
3374 "role"] + _SIMPLE_FIELDS
3377 def CheckArguments(self):
3378 _CheckOutputFields(static=self._FIELDS_STATIC,
3379 dynamic=self._FIELDS_DYNAMIC,
3380 selected=self.op.output_fields)
3382 def ExpandNames(self):
3383 self.needed_locks = {}
3384 self.share_locks[locking.LEVEL_NODE] = 1
3387 self.wanted = _GetWantedNodes(self, self.op.names)
3389 self.wanted = locking.ALL_SET
3391 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3392 self.do_locking = self.do_node_query and self.op.use_locking
3394 # if we don't request only static fields, we need to lock the nodes
3395 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3397 def Exec(self, feedback_fn):
3398 """Computes the list of nodes and their attributes.
3401 all_info = self.cfg.GetAllNodesInfo()
3403 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3404 elif self.wanted != locking.ALL_SET:
3405 nodenames = self.wanted
3406 missing = set(nodenames).difference(all_info.keys())
3408 raise errors.OpExecError(
3409 "Some nodes were removed before retrieving their data: %s" % missing)
3411 nodenames = all_info.keys()
3413 nodenames = utils.NiceSort(nodenames)
3414 nodelist = [all_info[name] for name in nodenames]
3416 # begin data gathering
3418 if self.do_node_query:
3420 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3421 self.cfg.GetHypervisorType())
3422 for name in nodenames:
3423 nodeinfo = node_data[name]
3424 if not nodeinfo.fail_msg and nodeinfo.payload:
3425 nodeinfo = nodeinfo.payload
3426 fn = utils.TryConvert
3428 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3429 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3430 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3431 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3432 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3433 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3434 "bootid": nodeinfo.get('bootid', None),
3435 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3436 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3439 live_data[name] = {}
3441 live_data = dict.fromkeys(nodenames, {})
3443 node_to_primary = dict([(name, set()) for name in nodenames])
3444 node_to_secondary = dict([(name, set()) for name in nodenames])
3446 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3447 "sinst_cnt", "sinst_list"))
3448 if inst_fields & frozenset(self.op.output_fields):
3449 inst_data = self.cfg.GetAllInstancesInfo()
3451 for inst in inst_data.values():
3452 if inst.primary_node in node_to_primary:
3453 node_to_primary[inst.primary_node].add(inst.name)
3454 for secnode in inst.secondary_nodes:
3455 if secnode in node_to_secondary:
3456 node_to_secondary[secnode].add(inst.name)
3458 master_node = self.cfg.GetMasterNode()
3460 # end data gathering
3463 for node in nodelist:
3465 for field in self.op.output_fields:
3466 if field in self._SIMPLE_FIELDS:
3467 val = getattr(node, field)
3468 elif field == "pinst_list":
3469 val = list(node_to_primary[node.name])
3470 elif field == "sinst_list":
3471 val = list(node_to_secondary[node.name])
3472 elif field == "pinst_cnt":
3473 val = len(node_to_primary[node.name])
3474 elif field == "sinst_cnt":
3475 val = len(node_to_secondary[node.name])
3476 elif field == "pip":
3477 val = node.primary_ip
3478 elif field == "sip":
3479 val = node.secondary_ip
3480 elif field == "tags":
3481 val = list(node.GetTags())
3482 elif field == "master":
3483 val = node.name == master_node
3484 elif self._FIELDS_DYNAMIC.Matches(field):
3485 val = live_data[node.name].get(field, None)
3486 elif field == "role":
3487 if node.name == master_node:
3489 elif node.master_candidate:
3498 raise errors.ParameterError(field)
3499 node_output.append(val)
3500 output.append(node_output)
3505 class LUQueryNodeVolumes(NoHooksLU):
3506 """Logical unit for getting volumes on node(s).
3510 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3511 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3514 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3515 _FIELDS_STATIC = utils.FieldSet("node")
3517 def CheckArguments(self):
3518 _CheckOutputFields(static=self._FIELDS_STATIC,
3519 dynamic=self._FIELDS_DYNAMIC,
3520 selected=self.op.output_fields)
3522 def ExpandNames(self):
3523 self.needed_locks = {}
3524 self.share_locks[locking.LEVEL_NODE] = 1
3525 if not self.op.nodes:
3526 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3528 self.needed_locks[locking.LEVEL_NODE] = \
3529 _GetWantedNodes(self, self.op.nodes)
3531 def Exec(self, feedback_fn):
3532 """Computes the list of nodes and their attributes.
3535 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3536 volumes = self.rpc.call_node_volumes(nodenames)
3538 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3539 in self.cfg.GetInstanceList()]
3541 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3544 for node in nodenames:
3545 nresult = volumes[node]
3548 msg = nresult.fail_msg
3550 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3553 node_vols = nresult.payload[:]
3554 node_vols.sort(key=lambda vol: vol['dev'])
3556 for vol in node_vols:
3558 for field in self.op.output_fields:
3561 elif field == "phys":
3565 elif field == "name":
3567 elif field == "size":
3568 val = int(float(vol['size']))
3569 elif field == "instance":
3571 if node not in lv_by_node[inst]:
3573 if vol['name'] in lv_by_node[inst][node]:
3579 raise errors.ParameterError(field)
3580 node_output.append(str(val))
3582 output.append(node_output)
3587 class LUQueryNodeStorage(NoHooksLU):
3588 """Logical unit for getting information on storage units on node(s).
3591 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3593 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3594 ("storage_type", _NoDefault, _CheckStorageType),
3595 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3596 ("name", None, _TMaybeString),
3600 def CheckArguments(self):
3601 _CheckOutputFields(static=self._FIELDS_STATIC,
3602 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3603 selected=self.op.output_fields)
3605 def ExpandNames(self):
3606 self.needed_locks = {}
3607 self.share_locks[locking.LEVEL_NODE] = 1
3610 self.needed_locks[locking.LEVEL_NODE] = \
3611 _GetWantedNodes(self, self.op.nodes)
3613 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3615 def Exec(self, feedback_fn):
3616 """Computes the list of nodes and their attributes.
3619 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3621 # Always get name to sort by
3622 if constants.SF_NAME in self.op.output_fields:
3623 fields = self.op.output_fields[:]
3625 fields = [constants.SF_NAME] + self.op.output_fields
3627 # Never ask for node or type as it's only known to the LU
3628 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3629 while extra in fields:
3630 fields.remove(extra)
3632 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3633 name_idx = field_idx[constants.SF_NAME]
3635 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3636 data = self.rpc.call_storage_list(self.nodes,
3637 self.op.storage_type, st_args,
3638 self.op.name, fields)
3642 for node in utils.NiceSort(self.nodes):
3643 nresult = data[node]
3647 msg = nresult.fail_msg
3649 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3652 rows = dict([(row[name_idx], row) for row in nresult.payload])
3654 for name in utils.NiceSort(rows.keys()):
3659 for field in self.op.output_fields:
3660 if field == constants.SF_NODE:
3662 elif field == constants.SF_TYPE:
3663 val = self.op.storage_type
3664 elif field in field_idx:
3665 val = row[field_idx[field]]
3667 raise errors.ParameterError(field)
3676 class LUModifyNodeStorage(NoHooksLU):
3677 """Logical unit for modifying a storage volume on a node.
3682 ("storage_type", _NoDefault, _CheckStorageType),
3683 ("name", _NoDefault, _TNonEmptyString),
3684 ("changes", _NoDefault, _TDict),
3688 def CheckArguments(self):
3689 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3691 storage_type = self.op.storage_type
3694 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3696 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3697 " modified" % storage_type,
3700 diff = set(self.op.changes.keys()) - modifiable
3702 raise errors.OpPrereqError("The following fields can not be modified for"
3703 " storage units of type '%s': %r" %
3704 (storage_type, list(diff)),
3707 def ExpandNames(self):
3708 self.needed_locks = {
3709 locking.LEVEL_NODE: self.op.node_name,
3712 def Exec(self, feedback_fn):
3713 """Computes the list of nodes and their attributes.
3716 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3717 result = self.rpc.call_storage_modify(self.op.node_name,
3718 self.op.storage_type, st_args,
3719 self.op.name, self.op.changes)
3720 result.Raise("Failed to modify storage unit '%s' on %s" %
3721 (self.op.name, self.op.node_name))
3724 class LUAddNode(LogicalUnit):
3725 """Logical unit for adding node to the cluster.
3729 HTYPE = constants.HTYPE_NODE
3732 ("primary_ip", None, _NoType),
3733 ("secondary_ip", None, _TMaybeString),
3734 ("readd", False, _TBool),
3737 def CheckArguments(self):
3738 # validate/normalize the node name
3739 self.op.node_name = netutils.HostInfo.NormalizeName(self.op.node_name)
3741 def BuildHooksEnv(self):
3744 This will run on all nodes before, and on all nodes + the new node after.
3748 "OP_TARGET": self.op.node_name,
3749 "NODE_NAME": self.op.node_name,
3750 "NODE_PIP": self.op.primary_ip,
3751 "NODE_SIP": self.op.secondary_ip,
3753 nodes_0 = self.cfg.GetNodeList()
3754 nodes_1 = nodes_0 + [self.op.node_name, ]
3755 return env, nodes_0, nodes_1
3757 def CheckPrereq(self):
3758 """Check prerequisites.
3761 - the new node is not already in the config
3763 - its parameters (single/dual homed) matches the cluster
3765 Any errors are signaled by raising errors.OpPrereqError.
3768 node_name = self.op.node_name
3771 dns_data = netutils.GetHostInfo(node_name)
3773 node = dns_data.name
3774 primary_ip = self.op.primary_ip = dns_data.ip
3775 if self.op.secondary_ip is None:
3776 self.op.secondary_ip = primary_ip
3777 if not netutils.IsValidIP4(self.op.secondary_ip):
3778 raise errors.OpPrereqError("Invalid secondary IP given",
3780 secondary_ip = self.op.secondary_ip
3782 node_list = cfg.GetNodeList()
3783 if not self.op.readd and node in node_list:
3784 raise errors.OpPrereqError("Node %s is already in the configuration" %
3785 node, errors.ECODE_EXISTS)
3786 elif self.op.readd and node not in node_list:
3787 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3790 self.changed_primary_ip = False
3792 for existing_node_name in node_list:
3793 existing_node = cfg.GetNodeInfo(existing_node_name)
3795 if self.op.readd and node == existing_node_name:
3796 if existing_node.secondary_ip != secondary_ip:
3797 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3798 " address configuration as before",
3800 if existing_node.primary_ip != primary_ip:
3801 self.changed_primary_ip = True
3805 if (existing_node.primary_ip == primary_ip or
3806 existing_node.secondary_ip == primary_ip or
3807 existing_node.primary_ip == secondary_ip or
3808 existing_node.secondary_ip == secondary_ip):
3809 raise errors.OpPrereqError("New node ip address(es) conflict with"
3810 " existing node %s" % existing_node.name,
3811 errors.ECODE_NOTUNIQUE)
3813 # check that the type of the node (single versus dual homed) is the
3814 # same as for the master
3815 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3816 master_singlehomed = myself.secondary_ip == myself.primary_ip
3817 newbie_singlehomed = secondary_ip == primary_ip
3818 if master_singlehomed != newbie_singlehomed:
3819 if master_singlehomed:
3820 raise errors.OpPrereqError("The master has no private ip but the"
3821 " new node has one",
3824 raise errors.OpPrereqError("The master has a private ip but the"
3825 " new node doesn't have one",
3828 # checks reachability
3829 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3830 raise errors.OpPrereqError("Node not reachable by ping",
3831 errors.ECODE_ENVIRON)
3833 if not newbie_singlehomed:
3834 # check reachability from my secondary ip to newbie's secondary ip
3835 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3836 source=myself.secondary_ip):
3837 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3838 " based ping to noded port",
3839 errors.ECODE_ENVIRON)
3846 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3849 self.new_node = self.cfg.GetNodeInfo(node)
3850 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3852 self.new_node = objects.Node(name=node,
3853 primary_ip=primary_ip,
3854 secondary_ip=secondary_ip,
3855 master_candidate=self.master_candidate,
3856 offline=False, drained=False)
3858 def Exec(self, feedback_fn):
3859 """Adds the new node to the cluster.
3862 new_node = self.new_node
3863 node = new_node.name
3865 # for re-adds, reset the offline/drained/master-candidate flags;
3866 # we need to reset here, otherwise offline would prevent RPC calls
3867 # later in the procedure; this also means that if the re-add
3868 # fails, we are left with a non-offlined, broken node
3870 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3871 self.LogInfo("Readding a node, the offline/drained flags were reset")
3872 # if we demote the node, we do cleanup later in the procedure
3873 new_node.master_candidate = self.master_candidate
3874 if self.changed_primary_ip:
3875 new_node.primary_ip = self.op.primary_ip
3877 # notify the user about any possible mc promotion
3878 if new_node.master_candidate:
3879 self.LogInfo("Node will be a master candidate")
3881 # check connectivity
3882 result = self.rpc.call_version([node])[node]
3883 result.Raise("Can't get version information from node %s" % node)
3884 if constants.PROTOCOL_VERSION == result.payload:
3885 logging.info("Communication to node %s fine, sw version %s match",
3886 node, result.payload)
3888 raise errors.OpExecError("Version mismatch master version %s,"
3889 " node version %s" %
3890 (constants.PROTOCOL_VERSION, result.payload))
3893 if self.cfg.GetClusterInfo().modify_ssh_setup:
3894 logging.info("Copy ssh key to node %s", node)
3895 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3897 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3898 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3902 keyarray.append(utils.ReadFile(i))
3904 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3905 keyarray[2], keyarray[3], keyarray[4],
3907 result.Raise("Cannot transfer ssh keys to the new node")
3909 # Add node to our /etc/hosts, and add key to known_hosts
3910 if self.cfg.GetClusterInfo().modify_etc_hosts:
3911 # FIXME: this should be done via an rpc call to node daemon
3912 utils.AddHostToEtcHosts(new_node.name)
3914 if new_node.secondary_ip != new_node.primary_ip:
3915 result = self.rpc.call_node_has_ip_address(new_node.name,
3916 new_node.secondary_ip)
3917 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3918 prereq=True, ecode=errors.ECODE_ENVIRON)
3919 if not result.payload:
3920 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3921 " you gave (%s). Please fix and re-run this"
3922 " command." % new_node.secondary_ip)
3924 node_verify_list = [self.cfg.GetMasterNode()]
3925 node_verify_param = {
3926 constants.NV_NODELIST: [node],
3927 # TODO: do a node-net-test as well?
3930 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3931 self.cfg.GetClusterName())
3932 for verifier in node_verify_list:
3933 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3934 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3936 for failed in nl_payload:
3937 feedback_fn("ssh/hostname verification failed"
3938 " (checking from %s): %s" %
3939 (verifier, nl_payload[failed]))
3940 raise errors.OpExecError("ssh/hostname verification failed.")
3943 _RedistributeAncillaryFiles(self)
3944 self.context.ReaddNode(new_node)
3945 # make sure we redistribute the config
3946 self.cfg.Update(new_node, feedback_fn)
3947 # and make sure the new node will not have old files around
3948 if not new_node.master_candidate:
3949 result = self.rpc.call_node_demote_from_mc(new_node.name)
3950 msg = result.fail_msg
3952 self.LogWarning("Node failed to demote itself from master"
3953 " candidate status: %s" % msg)
3955 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3956 self.context.AddNode(new_node, self.proc.GetECId())
3959 class LUSetNodeParams(LogicalUnit):
3960 """Modifies the parameters of a node.
3963 HPATH = "node-modify"
3964 HTYPE = constants.HTYPE_NODE
3967 ("master_candidate", None, _TMaybeBool),
3968 ("offline", None, _TMaybeBool),
3969 ("drained", None, _TMaybeBool),
3970 ("auto_promote", False, _TBool),
3975 def CheckArguments(self):
3976 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3977 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3978 if all_mods.count(None) == 3:
3979 raise errors.OpPrereqError("Please pass at least one modification",
3981 if all_mods.count(True) > 1:
3982 raise errors.OpPrereqError("Can't set the node into more than one"
3983 " state at the same time",
3986 # Boolean value that tells us whether we're offlining or draining the node
3987 self.offline_or_drain = (self.op.offline == True or
3988 self.op.drained == True)
3989 self.deoffline_or_drain = (self.op.offline == False or
3990 self.op.drained == False)
3991 self.might_demote = (self.op.master_candidate == False or
3992 self.offline_or_drain)
3994 self.lock_all = self.op.auto_promote and self.might_demote
3997 def ExpandNames(self):
3999 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4001 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4003 def BuildHooksEnv(self):
4006 This runs on the master node.
4010 "OP_TARGET": self.op.node_name,
4011 "MASTER_CANDIDATE": str(self.op.master_candidate),
4012 "OFFLINE": str(self.op.offline),
4013 "DRAINED": str(self.op.drained),
4015 nl = [self.cfg.GetMasterNode(),
4019 def CheckPrereq(self):
4020 """Check prerequisites.
4022 This only checks the instance list against the existing names.
4025 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4027 if (self.op.master_candidate is not None or
4028 self.op.drained is not None or
4029 self.op.offline is not None):
4030 # we can't change the master's node flags
4031 if self.op.node_name == self.cfg.GetMasterNode():
4032 raise errors.OpPrereqError("The master role can be changed"
4033 " only via master-failover",
4037 if node.master_candidate and self.might_demote and not self.lock_all:
4038 assert not self.op.auto_promote, "auto-promote set but lock_all not"
4039 # check if after removing the current node, we're missing master
4041 (mc_remaining, mc_should, _) = \
4042 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4043 if mc_remaining < mc_should:
4044 raise errors.OpPrereqError("Not enough master candidates, please"
4045 " pass auto_promote to allow promotion",
4048 if (self.op.master_candidate == True and
4049 ((node.offline and not self.op.offline == False) or
4050 (node.drained and not self.op.drained == False))):
4051 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
4052 " to master_candidate" % node.name,
4055 # If we're being deofflined/drained, we'll MC ourself if needed
4056 if (self.deoffline_or_drain and not self.offline_or_drain and not
4057 self.op.master_candidate == True and not node.master_candidate):
4058 self.op.master_candidate = _DecideSelfPromotion(self)
4059 if self.op.master_candidate:
4060 self.LogInfo("Autopromoting node to master candidate")
4064 def Exec(self, feedback_fn):
4073 if self.op.offline is not None:
4074 node.offline = self.op.offline
4075 result.append(("offline", str(self.op.offline)))
4076 if self.op.offline == True:
4077 if node.master_candidate:
4078 node.master_candidate = False
4080 result.append(("master_candidate", "auto-demotion due to offline"))
4082 node.drained = False
4083 result.append(("drained", "clear drained status due to offline"))
4085 if self.op.master_candidate is not None:
4086 node.master_candidate = self.op.master_candidate
4088 result.append(("master_candidate", str(self.op.master_candidate)))
4089 if self.op.master_candidate == False:
4090 rrc = self.rpc.call_node_demote_from_mc(node.name)
4093 self.LogWarning("Node failed to demote itself: %s" % msg)
4095 if self.op.drained is not None:
4096 node.drained = self.op.drained
4097 result.append(("drained", str(self.op.drained)))
4098 if self.op.drained == True:
4099 if node.master_candidate:
4100 node.master_candidate = False
4102 result.append(("master_candidate", "auto-demotion due to drain"))
4103 rrc = self.rpc.call_node_demote_from_mc(node.name)
4106 self.LogWarning("Node failed to demote itself: %s" % msg)
4108 node.offline = False
4109 result.append(("offline", "clear offline status due to drain"))
4111 # we locked all nodes, we adjust the CP before updating this node
4113 _AdjustCandidatePool(self, [node.name])
4115 # this will trigger configuration file update, if needed
4116 self.cfg.Update(node, feedback_fn)
4118 # this will trigger job queue propagation or cleanup
4120 self.context.ReaddNode(node)
4125 class LUPowercycleNode(NoHooksLU):
4126 """Powercycles a node.
4135 def CheckArguments(self):
4136 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4137 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4138 raise errors.OpPrereqError("The node is the master and the force"
4139 " parameter was not set",
4142 def ExpandNames(self):
4143 """Locking for PowercycleNode.
4145 This is a last-resort option and shouldn't block on other
4146 jobs. Therefore, we grab no locks.
4149 self.needed_locks = {}
4151 def Exec(self, feedback_fn):
4155 result = self.rpc.call_node_powercycle(self.op.node_name,
4156 self.cfg.GetHypervisorType())
4157 result.Raise("Failed to schedule the reboot")
4158 return result.payload
4161 class LUQueryClusterInfo(NoHooksLU):
4162 """Query cluster configuration.
4167 def ExpandNames(self):
4168 self.needed_locks = {}
4170 def Exec(self, feedback_fn):
4171 """Return cluster config.
4174 cluster = self.cfg.GetClusterInfo()
4177 # Filter just for enabled hypervisors
4178 for os_name, hv_dict in cluster.os_hvp.items():
4179 os_hvp[os_name] = {}
4180 for hv_name, hv_params in hv_dict.items():
4181 if hv_name in cluster.enabled_hypervisors:
4182 os_hvp[os_name][hv_name] = hv_params
4185 "software_version": constants.RELEASE_VERSION,
4186 "protocol_version": constants.PROTOCOL_VERSION,
4187 "config_version": constants.CONFIG_VERSION,
4188 "os_api_version": max(constants.OS_API_VERSIONS),
4189 "export_version": constants.EXPORT_VERSION,
4190 "architecture": (platform.architecture()[0], platform.machine()),
4191 "name": cluster.cluster_name,
4192 "master": cluster.master_node,
4193 "default_hypervisor": cluster.enabled_hypervisors[0],
4194 "enabled_hypervisors": cluster.enabled_hypervisors,
4195 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4196 for hypervisor_name in cluster.enabled_hypervisors]),
4198 "beparams": cluster.beparams,
4199 "osparams": cluster.osparams,
4200 "nicparams": cluster.nicparams,
4201 "candidate_pool_size": cluster.candidate_pool_size,
4202 "master_netdev": cluster.master_netdev,
4203 "volume_group_name": cluster.volume_group_name,
4204 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4205 "file_storage_dir": cluster.file_storage_dir,
4206 "maintain_node_health": cluster.maintain_node_health,
4207 "ctime": cluster.ctime,
4208 "mtime": cluster.mtime,
4209 "uuid": cluster.uuid,
4210 "tags": list(cluster.GetTags()),
4211 "uid_pool": cluster.uid_pool,
4212 "default_iallocator": cluster.default_iallocator,
4213 "reserved_lvs": cluster.reserved_lvs,
4219 class LUQueryConfigValues(NoHooksLU):
4220 """Return configuration values.
4223 _OP_PARAMS = [_POutputFields]
4225 _FIELDS_DYNAMIC = utils.FieldSet()
4226 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4229 def CheckArguments(self):
4230 _CheckOutputFields(static=self._FIELDS_STATIC,
4231 dynamic=self._FIELDS_DYNAMIC,
4232 selected=self.op.output_fields)
4234 def ExpandNames(self):
4235 self.needed_locks = {}
4237 def Exec(self, feedback_fn):
4238 """Dump a representation of the cluster config to the standard output.
4242 for field in self.op.output_fields:
4243 if field == "cluster_name":
4244 entry = self.cfg.GetClusterName()
4245 elif field == "master_node":
4246 entry = self.cfg.GetMasterNode()
4247 elif field == "drain_flag":
4248 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4249 elif field == "watcher_pause":
4250 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4252 raise errors.ParameterError(field)
4253 values.append(entry)
4257 class LUActivateInstanceDisks(NoHooksLU):
4258 """Bring up an instance's disks.
4263 ("ignore_size", False, _TBool),
4267 def ExpandNames(self):
4268 self._ExpandAndLockInstance()
4269 self.needed_locks[locking.LEVEL_NODE] = []
4270 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4272 def DeclareLocks(self, level):
4273 if level == locking.LEVEL_NODE:
4274 self._LockInstancesNodes()
4276 def CheckPrereq(self):
4277 """Check prerequisites.
4279 This checks that the instance is in the cluster.
4282 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4283 assert self.instance is not None, \
4284 "Cannot retrieve locked instance %s" % self.op.instance_name
4285 _CheckNodeOnline(self, self.instance.primary_node)
4287 def Exec(self, feedback_fn):
4288 """Activate the disks.
4291 disks_ok, disks_info = \
4292 _AssembleInstanceDisks(self, self.instance,
4293 ignore_size=self.op.ignore_size)
4295 raise errors.OpExecError("Cannot activate block devices")
4300 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4302 """Prepare the block devices for an instance.
4304 This sets up the block devices on all nodes.
4306 @type lu: L{LogicalUnit}
4307 @param lu: the logical unit on whose behalf we execute
4308 @type instance: L{objects.Instance}
4309 @param instance: the instance for whose disks we assemble
4310 @type disks: list of L{objects.Disk} or None
4311 @param disks: which disks to assemble (or all, if None)
4312 @type ignore_secondaries: boolean
4313 @param ignore_secondaries: if true, errors on secondary nodes
4314 won't result in an error return from the function
4315 @type ignore_size: boolean
4316 @param ignore_size: if true, the current known size of the disk
4317 will not be used during the disk activation, useful for cases
4318 when the size is wrong
4319 @return: False if the operation failed, otherwise a list of
4320 (host, instance_visible_name, node_visible_name)
4321 with the mapping from node devices to instance devices
4326 iname = instance.name
4327 disks = _ExpandCheckDisks(instance, disks)
4329 # With the two passes mechanism we try to reduce the window of
4330 # opportunity for the race condition of switching DRBD to primary
4331 # before handshaking occured, but we do not eliminate it
4333 # The proper fix would be to wait (with some limits) until the
4334 # connection has been made and drbd transitions from WFConnection
4335 # into any other network-connected state (Connected, SyncTarget,
4338 # 1st pass, assemble on all nodes in secondary mode
4339 for inst_disk in disks:
4340 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4342 node_disk = node_disk.Copy()
4343 node_disk.UnsetSize()
4344 lu.cfg.SetDiskID(node_disk, node)
4345 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4346 msg = result.fail_msg
4348 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4349 " (is_primary=False, pass=1): %s",
4350 inst_disk.iv_name, node, msg)
4351 if not ignore_secondaries:
4354 # FIXME: race condition on drbd migration to primary
4356 # 2nd pass, do only the primary node
4357 for inst_disk in disks:
4360 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4361 if node != instance.primary_node:
4364 node_disk = node_disk.Copy()
4365 node_disk.UnsetSize()
4366 lu.cfg.SetDiskID(node_disk, node)
4367 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4368 msg = result.fail_msg
4370 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4371 " (is_primary=True, pass=2): %s",
4372 inst_disk.iv_name, node, msg)
4375 dev_path = result.payload
4377 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4379 # leave the disks configured for the primary node
4380 # this is a workaround that would be fixed better by
4381 # improving the logical/physical id handling
4383 lu.cfg.SetDiskID(disk, instance.primary_node)
4385 return disks_ok, device_info
4388 def _StartInstanceDisks(lu, instance, force):
4389 """Start the disks of an instance.
4392 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4393 ignore_secondaries=force)
4395 _ShutdownInstanceDisks(lu, instance)
4396 if force is not None and not force:
4397 lu.proc.LogWarning("", hint="If the message above refers to a"
4399 " you can retry the operation using '--force'.")
4400 raise errors.OpExecError("Disk consistency error")
4403 class LUDeactivateInstanceDisks(NoHooksLU):
4404 """Shutdown an instance's disks.
4412 def ExpandNames(self):
4413 self._ExpandAndLockInstance()
4414 self.needed_locks[locking.LEVEL_NODE] = []
4415 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4417 def DeclareLocks(self, level):
4418 if level == locking.LEVEL_NODE:
4419 self._LockInstancesNodes()
4421 def CheckPrereq(self):
4422 """Check prerequisites.
4424 This checks that the instance is in the cluster.
4427 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4428 assert self.instance is not None, \
4429 "Cannot retrieve locked instance %s" % self.op.instance_name
4431 def Exec(self, feedback_fn):
4432 """Deactivate the disks
4435 instance = self.instance
4436 _SafeShutdownInstanceDisks(self, instance)
4439 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4440 """Shutdown block devices of an instance.
4442 This function checks if an instance is running, before calling
4443 _ShutdownInstanceDisks.
4446 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4447 _ShutdownInstanceDisks(lu, instance, disks=disks)
4450 def _ExpandCheckDisks(instance, disks):
4451 """Return the instance disks selected by the disks list
4453 @type disks: list of L{objects.Disk} or None
4454 @param disks: selected disks
4455 @rtype: list of L{objects.Disk}
4456 @return: selected instance disks to act on
4460 return instance.disks
4462 if not set(disks).issubset(instance.disks):
4463 raise errors.ProgrammerError("Can only act on disks belonging to the"
4468 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4469 """Shutdown block devices of an instance.
4471 This does the shutdown on all nodes of the instance.
4473 If the ignore_primary is false, errors on the primary node are
4478 disks = _ExpandCheckDisks(instance, disks)
4481 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4482 lu.cfg.SetDiskID(top_disk, node)
4483 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4484 msg = result.fail_msg
4486 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4487 disk.iv_name, node, msg)
4488 if not ignore_primary or node != instance.primary_node:
4493 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4494 """Checks if a node has enough free memory.
4496 This function check if a given node has the needed amount of free
4497 memory. In case the node has less memory or we cannot get the
4498 information from the node, this function raise an OpPrereqError
4501 @type lu: C{LogicalUnit}
4502 @param lu: a logical unit from which we get configuration data
4504 @param node: the node to check
4505 @type reason: C{str}
4506 @param reason: string to use in the error message
4507 @type requested: C{int}
4508 @param requested: the amount of memory in MiB to check for
4509 @type hypervisor_name: C{str}
4510 @param hypervisor_name: the hypervisor to ask for memory stats
4511 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4512 we cannot check the node
4515 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4516 nodeinfo[node].Raise("Can't get data from node %s" % node,
4517 prereq=True, ecode=errors.ECODE_ENVIRON)
4518 free_mem = nodeinfo[node].payload.get('memory_free', None)
4519 if not isinstance(free_mem, int):
4520 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4521 " was '%s'" % (node, free_mem),
4522 errors.ECODE_ENVIRON)
4523 if requested > free_mem:
4524 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4525 " needed %s MiB, available %s MiB" %
4526 (node, reason, requested, free_mem),
4530 def _CheckNodesFreeDisk(lu, nodenames, requested):
4531 """Checks if nodes have enough free disk space in the default VG.
4533 This function check if all given nodes have the needed amount of
4534 free disk. In case any node has less disk or we cannot get the
4535 information from the node, this function raise an OpPrereqError
4538 @type lu: C{LogicalUnit}
4539 @param lu: a logical unit from which we get configuration data
4540 @type nodenames: C{list}
4541 @param nodenames: the list of node names to check
4542 @type requested: C{int}
4543 @param requested: the amount of disk in MiB to check for
4544 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4545 we cannot check the node
4548 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4549 lu.cfg.GetHypervisorType())
4550 for node in nodenames:
4551 info = nodeinfo[node]
4552 info.Raise("Cannot get current information from node %s" % node,
4553 prereq=True, ecode=errors.ECODE_ENVIRON)
4554 vg_free = info.payload.get("vg_free", None)
4555 if not isinstance(vg_free, int):
4556 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4557 " result was '%s'" % (node, vg_free),
4558 errors.ECODE_ENVIRON)
4559 if requested > vg_free:
4560 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4561 " required %d MiB, available %d MiB" %
4562 (node, requested, vg_free),
4566 class LUStartupInstance(LogicalUnit):
4567 """Starts an instance.
4570 HPATH = "instance-start"
4571 HTYPE = constants.HTYPE_INSTANCE
4575 ("hvparams", _EmptyDict, _TDict),
4576 ("beparams", _EmptyDict, _TDict),
4580 def CheckArguments(self):
4582 if self.op.beparams:
4583 # fill the beparams dict
4584 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4586 def ExpandNames(self):
4587 self._ExpandAndLockInstance()
4589 def BuildHooksEnv(self):
4592 This runs on master, primary and secondary nodes of the instance.
4596 "FORCE": self.op.force,
4598 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4599 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4602 def CheckPrereq(self):
4603 """Check prerequisites.
4605 This checks that the instance is in the cluster.
4608 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4609 assert self.instance is not None, \
4610 "Cannot retrieve locked instance %s" % self.op.instance_name
4613 if self.op.hvparams:
4614 # check hypervisor parameter syntax (locally)
4615 cluster = self.cfg.GetClusterInfo()
4616 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4617 filled_hvp = cluster.FillHV(instance)
4618 filled_hvp.update(self.op.hvparams)
4619 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4620 hv_type.CheckParameterSyntax(filled_hvp)
4621 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4623 _CheckNodeOnline(self, instance.primary_node)
4625 bep = self.cfg.GetClusterInfo().FillBE(instance)
4626 # check bridges existence
4627 _CheckInstanceBridgesExist(self, instance)
4629 remote_info = self.rpc.call_instance_info(instance.primary_node,
4631 instance.hypervisor)
4632 remote_info.Raise("Error checking node %s" % instance.primary_node,
4633 prereq=True, ecode=errors.ECODE_ENVIRON)
4634 if not remote_info.payload: # not running already
4635 _CheckNodeFreeMemory(self, instance.primary_node,
4636 "starting instance %s" % instance.name,
4637 bep[constants.BE_MEMORY], instance.hypervisor)
4639 def Exec(self, feedback_fn):
4640 """Start the instance.
4643 instance = self.instance
4644 force = self.op.force
4646 self.cfg.MarkInstanceUp(instance.name)
4648 node_current = instance.primary_node
4650 _StartInstanceDisks(self, instance, force)
4652 result = self.rpc.call_instance_start(node_current, instance,
4653 self.op.hvparams, self.op.beparams)
4654 msg = result.fail_msg
4656 _ShutdownInstanceDisks(self, instance)
4657 raise errors.OpExecError("Could not start instance: %s" % msg)
4660 class LURebootInstance(LogicalUnit):
4661 """Reboot an instance.
4664 HPATH = "instance-reboot"
4665 HTYPE = constants.HTYPE_INSTANCE
4668 ("ignore_secondaries", False, _TBool),
4669 ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4674 def ExpandNames(self):
4675 self._ExpandAndLockInstance()
4677 def BuildHooksEnv(self):
4680 This runs on master, primary and secondary nodes of the instance.
4684 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4685 "REBOOT_TYPE": self.op.reboot_type,
4686 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4688 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4689 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4692 def CheckPrereq(self):
4693 """Check prerequisites.
4695 This checks that the instance is in the cluster.
4698 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4699 assert self.instance is not None, \
4700 "Cannot retrieve locked instance %s" % self.op.instance_name
4702 _CheckNodeOnline(self, instance.primary_node)
4704 # check bridges existence
4705 _CheckInstanceBridgesExist(self, instance)
4707 def Exec(self, feedback_fn):
4708 """Reboot the instance.
4711 instance = self.instance
4712 ignore_secondaries = self.op.ignore_secondaries
4713 reboot_type = self.op.reboot_type
4715 node_current = instance.primary_node
4717 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4718 constants.INSTANCE_REBOOT_HARD]:
4719 for disk in instance.disks:
4720 self.cfg.SetDiskID(disk, node_current)
4721 result = self.rpc.call_instance_reboot(node_current, instance,
4723 self.op.shutdown_timeout)
4724 result.Raise("Could not reboot instance")
4726 result = self.rpc.call_instance_shutdown(node_current, instance,
4727 self.op.shutdown_timeout)
4728 result.Raise("Could not shutdown instance for full reboot")
4729 _ShutdownInstanceDisks(self, instance)
4730 _StartInstanceDisks(self, instance, ignore_secondaries)
4731 result = self.rpc.call_instance_start(node_current, instance, None, None)
4732 msg = result.fail_msg
4734 _ShutdownInstanceDisks(self, instance)
4735 raise errors.OpExecError("Could not start instance for"
4736 " full reboot: %s" % msg)
4738 self.cfg.MarkInstanceUp(instance.name)
4741 class LUShutdownInstance(LogicalUnit):
4742 """Shutdown an instance.
4745 HPATH = "instance-stop"
4746 HTYPE = constants.HTYPE_INSTANCE
4749 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4753 def ExpandNames(self):
4754 self._ExpandAndLockInstance()
4756 def BuildHooksEnv(self):
4759 This runs on master, primary and secondary nodes of the instance.
4762 env = _BuildInstanceHookEnvByObject(self, self.instance)
4763 env["TIMEOUT"] = self.op.timeout
4764 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4767 def CheckPrereq(self):
4768 """Check prerequisites.
4770 This checks that the instance is in the cluster.
4773 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4774 assert self.instance is not None, \
4775 "Cannot retrieve locked instance %s" % self.op.instance_name
4776 _CheckNodeOnline(self, self.instance.primary_node)
4778 def Exec(self, feedback_fn):
4779 """Shutdown the instance.
4782 instance = self.instance
4783 node_current = instance.primary_node
4784 timeout = self.op.timeout
4785 self.cfg.MarkInstanceDown(instance.name)
4786 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4787 msg = result.fail_msg
4789 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4791 _ShutdownInstanceDisks(self, instance)
4794 class LUReinstallInstance(LogicalUnit):
4795 """Reinstall an instance.
4798 HPATH = "instance-reinstall"
4799 HTYPE = constants.HTYPE_INSTANCE
4802 ("os_type", None, _TMaybeString),
4803 ("force_variant", False, _TBool),
4807 def ExpandNames(self):
4808 self._ExpandAndLockInstance()
4810 def BuildHooksEnv(self):
4813 This runs on master, primary and secondary nodes of the instance.
4816 env = _BuildInstanceHookEnvByObject(self, self.instance)
4817 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4820 def CheckPrereq(self):
4821 """Check prerequisites.
4823 This checks that the instance is in the cluster and is not running.
4826 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4827 assert instance is not None, \
4828 "Cannot retrieve locked instance %s" % self.op.instance_name
4829 _CheckNodeOnline(self, instance.primary_node)
4831 if instance.disk_template == constants.DT_DISKLESS:
4832 raise errors.OpPrereqError("Instance '%s' has no disks" %
4833 self.op.instance_name,
4835 _CheckInstanceDown(self, instance, "cannot reinstall")
4837 if self.op.os_type is not None:
4839 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4840 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4842 self.instance = instance
4844 def Exec(self, feedback_fn):
4845 """Reinstall the instance.
4848 inst = self.instance
4850 if self.op.os_type is not None:
4851 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4852 inst.os = self.op.os_type
4853 self.cfg.Update(inst, feedback_fn)
4855 _StartInstanceDisks(self, inst, None)
4857 feedback_fn("Running the instance OS create scripts...")
4858 # FIXME: pass debug option from opcode to backend
4859 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4860 self.op.debug_level)
4861 result.Raise("Could not install OS for instance %s on node %s" %
4862 (inst.name, inst.primary_node))
4864 _ShutdownInstanceDisks(self, inst)
4867 class LURecreateInstanceDisks(LogicalUnit):
4868 """Recreate an instance's missing disks.
4871 HPATH = "instance-recreate-disks"
4872 HTYPE = constants.HTYPE_INSTANCE
4875 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4879 def ExpandNames(self):
4880 self._ExpandAndLockInstance()
4882 def BuildHooksEnv(self):
4885 This runs on master, primary and secondary nodes of the instance.
4888 env = _BuildInstanceHookEnvByObject(self, self.instance)
4889 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4892 def CheckPrereq(self):
4893 """Check prerequisites.
4895 This checks that the instance is in the cluster and is not running.
4898 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4899 assert instance is not None, \
4900 "Cannot retrieve locked instance %s" % self.op.instance_name
4901 _CheckNodeOnline(self, instance.primary_node)
4903 if instance.disk_template == constants.DT_DISKLESS:
4904 raise errors.OpPrereqError("Instance '%s' has no disks" %
4905 self.op.instance_name, errors.ECODE_INVAL)
4906 _CheckInstanceDown(self, instance, "cannot recreate disks")
4908 if not self.op.disks:
4909 self.op.disks = range(len(instance.disks))
4911 for idx in self.op.disks:
4912 if idx >= len(instance.disks):
4913 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4916 self.instance = instance
4918 def Exec(self, feedback_fn):
4919 """Recreate the disks.
4923 for idx, _ in enumerate(self.instance.disks):
4924 if idx not in self.op.disks: # disk idx has not been passed in
4928 _CreateDisks(self, self.instance, to_skip=to_skip)
4931 class LURenameInstance(LogicalUnit):
4932 """Rename an instance.
4935 HPATH = "instance-rename"
4936 HTYPE = constants.HTYPE_INSTANCE
4939 ("new_name", _NoDefault, _TNonEmptyString),
4940 ("ip_check", False, _TBool),
4941 ("name_check", True, _TBool),
4944 def CheckArguments(self):
4948 if self.op.ip_check and not self.op.name_check:
4949 # TODO: make the ip check more flexible and not depend on the name check
4950 raise errors.OpPrereqError("Cannot do ip check without a name check",
4953 def BuildHooksEnv(self):
4956 This runs on master, primary and secondary nodes of the instance.
4959 env = _BuildInstanceHookEnvByObject(self, self.instance)
4960 env["INSTANCE_NEW_NAME"] = self.op.new_name
4961 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4964 def CheckPrereq(self):
4965 """Check prerequisites.
4967 This checks that the instance is in the cluster and is not running.
4970 self.op.instance_name = _ExpandInstanceName(self.cfg,
4971 self.op.instance_name)
4972 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4973 assert instance is not None
4974 _CheckNodeOnline(self, instance.primary_node)
4975 _CheckInstanceDown(self, instance, "cannot rename")
4976 self.instance = instance
4978 new_name = self.op.new_name
4979 if self.op.name_check:
4980 hostinfo = netutils.HostInfo(netutils.HostInfo.NormalizeName(new_name))
4981 new_name = hostinfo.name
4982 if (self.op.ip_check and
4983 netutils.TcpPing(hostinfo.ip, constants.DEFAULT_NODED_PORT)):
4984 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4985 (hostinfo.ip, new_name),
4986 errors.ECODE_NOTUNIQUE)
4988 instance_list = self.cfg.GetInstanceList()
4989 if new_name in instance_list:
4990 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4991 new_name, errors.ECODE_EXISTS)
4994 def Exec(self, feedback_fn):
4995 """Reinstall the instance.
4998 inst = self.instance
4999 old_name = inst.name
5001 if inst.disk_template == constants.DT_FILE:
5002 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5004 self.cfg.RenameInstance(inst.name, self.op.new_name)
5005 # Change the instance lock. This is definitely safe while we hold the BGL
5006 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5007 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5009 # re-read the instance from the configuration after rename
5010 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5012 if inst.disk_template == constants.DT_FILE:
5013 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5014 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5015 old_file_storage_dir,
5016 new_file_storage_dir)
5017 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5018 " (but the instance has been renamed in Ganeti)" %
5019 (inst.primary_node, old_file_storage_dir,
5020 new_file_storage_dir))
5022 _StartInstanceDisks(self, inst, None)
5024 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5025 old_name, self.op.debug_level)
5026 msg = result.fail_msg
5028 msg = ("Could not run OS rename script for instance %s on node %s"
5029 " (but the instance has been renamed in Ganeti): %s" %
5030 (inst.name, inst.primary_node, msg))
5031 self.proc.LogWarning(msg)
5033 _ShutdownInstanceDisks(self, inst)
5038 class LURemoveInstance(LogicalUnit):
5039 """Remove an instance.
5042 HPATH = "instance-remove"
5043 HTYPE = constants.HTYPE_INSTANCE
5046 ("ignore_failures", False, _TBool),
5051 def ExpandNames(self):
5052 self._ExpandAndLockInstance()
5053 self.needed_locks[locking.LEVEL_NODE] = []
5054 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5056 def DeclareLocks(self, level):
5057 if level == locking.LEVEL_NODE:
5058 self._LockInstancesNodes()
5060 def BuildHooksEnv(self):
5063 This runs on master, primary and secondary nodes of the instance.
5066 env = _BuildInstanceHookEnvByObject(self, self.instance)
5067 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5068 nl = [self.cfg.GetMasterNode()]
5069 nl_post = list(self.instance.all_nodes) + nl
5070 return env, nl, nl_post
5072 def CheckPrereq(self):
5073 """Check prerequisites.
5075 This checks that the instance is in the cluster.
5078 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5079 assert self.instance is not None, \
5080 "Cannot retrieve locked instance %s" % self.op.instance_name
5082 def Exec(self, feedback_fn):
5083 """Remove the instance.
5086 instance = self.instance
5087 logging.info("Shutting down instance %s on node %s",
5088 instance.name, instance.primary_node)
5090 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5091 self.op.shutdown_timeout)
5092 msg = result.fail_msg
5094 if self.op.ignore_failures:
5095 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5097 raise errors.OpExecError("Could not shutdown instance %s on"
5099 (instance.name, instance.primary_node, msg))
5101 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5104 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5105 """Utility function to remove an instance.
5108 logging.info("Removing block devices for instance %s", instance.name)
5110 if not _RemoveDisks(lu, instance):
5111 if not ignore_failures:
5112 raise errors.OpExecError("Can't remove instance's disks")
5113 feedback_fn("Warning: can't remove instance's disks")
5115 logging.info("Removing instance %s out of cluster config", instance.name)
5117 lu.cfg.RemoveInstance(instance.name)
5119 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5120 "Instance lock removal conflict"
5122 # Remove lock for the instance
5123 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5126 class LUQueryInstances(NoHooksLU):
5127 """Logical unit for querying instances.
5130 # pylint: disable-msg=W0142
5132 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
5133 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
5134 ("use_locking", False, _TBool),
5137 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5138 "serial_no", "ctime", "mtime", "uuid"]
5139 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5141 "disk_template", "ip", "mac", "bridge",
5142 "nic_mode", "nic_link",
5143 "sda_size", "sdb_size", "vcpus", "tags",
5144 "network_port", "beparams",
5145 r"(disk)\.(size)/([0-9]+)",
5146 r"(disk)\.(sizes)", "disk_usage",
5147 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5148 r"(nic)\.(bridge)/([0-9]+)",
5149 r"(nic)\.(macs|ips|modes|links|bridges)",
5150 r"(disk|nic)\.(count)",
5152 ] + _SIMPLE_FIELDS +
5154 for name in constants.HVS_PARAMETERS
5155 if name not in constants.HVC_GLOBALS] +
5157 for name in constants.BES_PARAMETERS])
5158 _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5164 def CheckArguments(self):
5165 _CheckOutputFields(static=self._FIELDS_STATIC,
5166 dynamic=self._FIELDS_DYNAMIC,
5167 selected=self.op.output_fields)
5169 def ExpandNames(self):
5170 self.needed_locks = {}
5171 self.share_locks[locking.LEVEL_INSTANCE] = 1
5172 self.share_locks[locking.LEVEL_NODE] = 1
5175 self.wanted = _GetWantedInstances(self, self.op.names)
5177 self.wanted = locking.ALL_SET
5179 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5180 self.do_locking = self.do_node_query and self.op.use_locking
5182 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5183 self.needed_locks[locking.LEVEL_NODE] = []
5184 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5186 def DeclareLocks(self, level):
5187 if level == locking.LEVEL_NODE and self.do_locking:
5188 self._LockInstancesNodes()
5190 def Exec(self, feedback_fn):
5191 """Computes the list of nodes and their attributes.
5194 # pylint: disable-msg=R0912
5195 # way too many branches here
5196 all_info = self.cfg.GetAllInstancesInfo()
5197 if self.wanted == locking.ALL_SET:
5198 # caller didn't specify instance names, so ordering is not important
5200 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5202 instance_names = all_info.keys()
5203 instance_names = utils.NiceSort(instance_names)
5205 # caller did specify names, so we must keep the ordering
5207 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5209 tgt_set = all_info.keys()
5210 missing = set(self.wanted).difference(tgt_set)
5212 raise errors.OpExecError("Some instances were removed before"
5213 " retrieving their data: %s" % missing)
5214 instance_names = self.wanted
5216 instance_list = [all_info[iname] for iname in instance_names]
5218 # begin data gathering
5220 nodes = frozenset([inst.primary_node for inst in instance_list])
5221 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5225 if self.do_node_query:
5227 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5229 result = node_data[name]
5231 # offline nodes will be in both lists
5232 off_nodes.append(name)
5234 bad_nodes.append(name)
5237 live_data.update(result.payload)
5238 # else no instance is alive
5240 live_data = dict([(name, {}) for name in instance_names])
5242 # end data gathering
5247 cluster = self.cfg.GetClusterInfo()
5248 for instance in instance_list:
5250 i_hv = cluster.FillHV(instance, skip_globals=True)
5251 i_be = cluster.FillBE(instance)
5252 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5253 for field in self.op.output_fields:
5254 st_match = self._FIELDS_STATIC.Matches(field)
5255 if field in self._SIMPLE_FIELDS:
5256 val = getattr(instance, field)
5257 elif field == "pnode":
5258 val = instance.primary_node
5259 elif field == "snodes":
5260 val = list(instance.secondary_nodes)
5261 elif field == "admin_state":
5262 val = instance.admin_up
5263 elif field == "oper_state":
5264 if instance.primary_node in bad_nodes:
5267 val = bool(live_data.get(instance.name))
5268 elif field == "status":
5269 if instance.primary_node in off_nodes:
5270 val = "ERROR_nodeoffline"
5271 elif instance.primary_node in bad_nodes:
5272 val = "ERROR_nodedown"
5274 running = bool(live_data.get(instance.name))
5276 if instance.admin_up:
5281 if instance.admin_up:
5285 elif field == "oper_ram":
5286 if instance.primary_node in bad_nodes:
5288 elif instance.name in live_data:
5289 val = live_data[instance.name].get("memory", "?")
5292 elif field == "oper_vcpus":
5293 if instance.primary_node in bad_nodes:
5295 elif instance.name in live_data:
5296 val = live_data[instance.name].get("vcpus", "?")
5299 elif field == "vcpus":
5300 val = i_be[constants.BE_VCPUS]
5301 elif field == "disk_template":
5302 val = instance.disk_template
5305 val = instance.nics[0].ip
5308 elif field == "nic_mode":
5310 val = i_nicp[0][constants.NIC_MODE]
5313 elif field == "nic_link":
5315 val = i_nicp[0][constants.NIC_LINK]
5318 elif field == "bridge":
5319 if (instance.nics and
5320 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5321 val = i_nicp[0][constants.NIC_LINK]
5324 elif field == "mac":
5326 val = instance.nics[0].mac
5329 elif field == "sda_size" or field == "sdb_size":
5330 idx = ord(field[2]) - ord('a')
5332 val = instance.FindDisk(idx).size
5333 except errors.OpPrereqError:
5335 elif field == "disk_usage": # total disk usage per node
5336 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5337 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5338 elif field == "tags":
5339 val = list(instance.GetTags())
5340 elif field == "hvparams":
5342 elif (field.startswith(HVPREFIX) and
5343 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5344 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5345 val = i_hv.get(field[len(HVPREFIX):], None)
5346 elif field == "beparams":
5348 elif (field.startswith(BEPREFIX) and
5349 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5350 val = i_be.get(field[len(BEPREFIX):], None)
5351 elif st_match and st_match.groups():
5352 # matches a variable list
5353 st_groups = st_match.groups()
5354 if st_groups and st_groups[0] == "disk":
5355 if st_groups[1] == "count":
5356 val = len(instance.disks)
5357 elif st_groups[1] == "sizes":
5358 val = [disk.size for disk in instance.disks]
5359 elif st_groups[1] == "size":
5361 val = instance.FindDisk(st_groups[2]).size
5362 except errors.OpPrereqError:
5365 assert False, "Unhandled disk parameter"
5366 elif st_groups[0] == "nic":
5367 if st_groups[1] == "count":
5368 val = len(instance.nics)
5369 elif st_groups[1] == "macs":
5370 val = [nic.mac for nic in instance.nics]
5371 elif st_groups[1] == "ips":
5372 val = [nic.ip for nic in instance.nics]
5373 elif st_groups[1] == "modes":
5374 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5375 elif st_groups[1] == "links":
5376 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5377 elif st_groups[1] == "bridges":
5380 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5381 val.append(nicp[constants.NIC_LINK])
5386 nic_idx = int(st_groups[2])
5387 if nic_idx >= len(instance.nics):
5390 if st_groups[1] == "mac":
5391 val = instance.nics[nic_idx].mac
5392 elif st_groups[1] == "ip":
5393 val = instance.nics[nic_idx].ip
5394 elif st_groups[1] == "mode":
5395 val = i_nicp[nic_idx][constants.NIC_MODE]
5396 elif st_groups[1] == "link":
5397 val = i_nicp[nic_idx][constants.NIC_LINK]
5398 elif st_groups[1] == "bridge":
5399 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5400 if nic_mode == constants.NIC_MODE_BRIDGED:
5401 val = i_nicp[nic_idx][constants.NIC_LINK]
5405 assert False, "Unhandled NIC parameter"
5407 assert False, ("Declared but unhandled variable parameter '%s'" %
5410 assert False, "Declared but unhandled parameter '%s'" % field
5417 class LUFailoverInstance(LogicalUnit):
5418 """Failover an instance.
5421 HPATH = "instance-failover"
5422 HTYPE = constants.HTYPE_INSTANCE
5425 ("ignore_consistency", False, _TBool),
5430 def ExpandNames(self):
5431 self._ExpandAndLockInstance()
5432 self.needed_locks[locking.LEVEL_NODE] = []
5433 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5435 def DeclareLocks(self, level):
5436 if level == locking.LEVEL_NODE:
5437 self._LockInstancesNodes()
5439 def BuildHooksEnv(self):
5442 This runs on master, primary and secondary nodes of the instance.
5445 instance = self.instance
5446 source_node = instance.primary_node
5447 target_node = instance.secondary_nodes[0]
5449 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5450 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5451 "OLD_PRIMARY": source_node,
5452 "OLD_SECONDARY": target_node,
5453 "NEW_PRIMARY": target_node,
5454 "NEW_SECONDARY": source_node,
5456 env.update(_BuildInstanceHookEnvByObject(self, instance))
5457 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5459 nl_post.append(source_node)
5460 return env, nl, nl_post
5462 def CheckPrereq(self):
5463 """Check prerequisites.
5465 This checks that the instance is in the cluster.
5468 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5469 assert self.instance is not None, \
5470 "Cannot retrieve locked instance %s" % self.op.instance_name
5472 bep = self.cfg.GetClusterInfo().FillBE(instance)
5473 if instance.disk_template not in constants.DTS_NET_MIRROR:
5474 raise errors.OpPrereqError("Instance's disk layout is not"
5475 " network mirrored, cannot failover.",
5478 secondary_nodes = instance.secondary_nodes
5479 if not secondary_nodes:
5480 raise errors.ProgrammerError("no secondary node but using "
5481 "a mirrored disk template")
5483 target_node = secondary_nodes[0]
5484 _CheckNodeOnline(self, target_node)
5485 _CheckNodeNotDrained(self, target_node)
5486 if instance.admin_up:
5487 # check memory requirements on the secondary node
5488 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5489 instance.name, bep[constants.BE_MEMORY],
5490 instance.hypervisor)
5492 self.LogInfo("Not checking memory on the secondary node as"
5493 " instance will not be started")
5495 # check bridge existance
5496 _CheckInstanceBridgesExist(self, instance, node=target_node)
5498 def Exec(self, feedback_fn):
5499 """Failover an instance.
5501 The failover is done by shutting it down on its present node and
5502 starting it on the secondary.
5505 instance = self.instance
5507 source_node = instance.primary_node
5508 target_node = instance.secondary_nodes[0]
5510 if instance.admin_up:
5511 feedback_fn("* checking disk consistency between source and target")
5512 for dev in instance.disks:
5513 # for drbd, these are drbd over lvm
5514 if not _CheckDiskConsistency(self, dev, target_node, False):
5515 if not self.op.ignore_consistency:
5516 raise errors.OpExecError("Disk %s is degraded on target node,"
5517 " aborting failover." % dev.iv_name)
5519 feedback_fn("* not checking disk consistency as instance is not running")
5521 feedback_fn("* shutting down instance on source node")
5522 logging.info("Shutting down instance %s on node %s",
5523 instance.name, source_node)
5525 result = self.rpc.call_instance_shutdown(source_node, instance,
5526 self.op.shutdown_timeout)
5527 msg = result.fail_msg
5529 if self.op.ignore_consistency:
5530 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5531 " Proceeding anyway. Please make sure node"
5532 " %s is down. Error details: %s",
5533 instance.name, source_node, source_node, msg)
5535 raise errors.OpExecError("Could not shutdown instance %s on"
5537 (instance.name, source_node, msg))
5539 feedback_fn("* deactivating the instance's disks on source node")
5540 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5541 raise errors.OpExecError("Can't shut down the instance's disks.")
5543 instance.primary_node = target_node
5544 # distribute new instance config to the other nodes
5545 self.cfg.Update(instance, feedback_fn)
5547 # Only start the instance if it's marked as up
5548 if instance.admin_up:
5549 feedback_fn("* activating the instance's disks on target node")
5550 logging.info("Starting instance %s on node %s",
5551 instance.name, target_node)
5553 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5554 ignore_secondaries=True)
5556 _ShutdownInstanceDisks(self, instance)
5557 raise errors.OpExecError("Can't activate the instance's disks")
5559 feedback_fn("* starting the instance on the target node")
5560 result = self.rpc.call_instance_start(target_node, instance, None, None)
5561 msg = result.fail_msg
5563 _ShutdownInstanceDisks(self, instance)
5564 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5565 (instance.name, target_node, msg))
5568 class LUMigrateInstance(LogicalUnit):
5569 """Migrate an instance.
5571 This is migration without shutting down, compared to the failover,
5572 which is done with shutdown.
5575 HPATH = "instance-migrate"
5576 HTYPE = constants.HTYPE_INSTANCE
5581 ("cleanup", False, _TBool),
5586 def ExpandNames(self):
5587 self._ExpandAndLockInstance()
5589 self.needed_locks[locking.LEVEL_NODE] = []
5590 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5592 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5594 self.tasklets = [self._migrater]
5596 def DeclareLocks(self, level):
5597 if level == locking.LEVEL_NODE:
5598 self._LockInstancesNodes()
5600 def BuildHooksEnv(self):
5603 This runs on master, primary and secondary nodes of the instance.
5606 instance = self._migrater.instance
5607 source_node = instance.primary_node
5608 target_node = instance.secondary_nodes[0]
5609 env = _BuildInstanceHookEnvByObject(self, instance)
5610 env["MIGRATE_LIVE"] = self._migrater.live
5611 env["MIGRATE_CLEANUP"] = self.op.cleanup
5613 "OLD_PRIMARY": source_node,
5614 "OLD_SECONDARY": target_node,
5615 "NEW_PRIMARY": target_node,
5616 "NEW_SECONDARY": source_node,
5618 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5620 nl_post.append(source_node)
5621 return env, nl, nl_post
5624 class LUMoveInstance(LogicalUnit):
5625 """Move an instance by data-copying.
5628 HPATH = "instance-move"
5629 HTYPE = constants.HTYPE_INSTANCE
5632 ("target_node", _NoDefault, _TNonEmptyString),
5637 def ExpandNames(self):
5638 self._ExpandAndLockInstance()
5639 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5640 self.op.target_node = target_node
5641 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5642 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5644 def DeclareLocks(self, level):
5645 if level == locking.LEVEL_NODE:
5646 self._LockInstancesNodes(primary_only=True)
5648 def BuildHooksEnv(self):
5651 This runs on master, primary and secondary nodes of the instance.
5655 "TARGET_NODE": self.op.target_node,
5656 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5658 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5659 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5660 self.op.target_node]
5663 def CheckPrereq(self):
5664 """Check prerequisites.
5666 This checks that the instance is in the cluster.
5669 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5670 assert self.instance is not None, \
5671 "Cannot retrieve locked instance %s" % self.op.instance_name
5673 node = self.cfg.GetNodeInfo(self.op.target_node)
5674 assert node is not None, \
5675 "Cannot retrieve locked node %s" % self.op.target_node
5677 self.target_node = target_node = node.name
5679 if target_node == instance.primary_node:
5680 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5681 (instance.name, target_node),
5684 bep = self.cfg.GetClusterInfo().FillBE(instance)
5686 for idx, dsk in enumerate(instance.disks):
5687 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5688 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5689 " cannot copy" % idx, errors.ECODE_STATE)
5691 _CheckNodeOnline(self, target_node)
5692 _CheckNodeNotDrained(self, target_node)
5694 if instance.admin_up:
5695 # check memory requirements on the secondary node
5696 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5697 instance.name, bep[constants.BE_MEMORY],
5698 instance.hypervisor)
5700 self.LogInfo("Not checking memory on the secondary node as"
5701 " instance will not be started")
5703 # check bridge existance
5704 _CheckInstanceBridgesExist(self, instance, node=target_node)
5706 def Exec(self, feedback_fn):
5707 """Move an instance.
5709 The move is done by shutting it down on its present node, copying
5710 the data over (slow) and starting it on the new node.
5713 instance = self.instance
5715 source_node = instance.primary_node
5716 target_node = self.target_node
5718 self.LogInfo("Shutting down instance %s on source node %s",
5719 instance.name, source_node)
5721 result = self.rpc.call_instance_shutdown(source_node, instance,
5722 self.op.shutdown_timeout)
5723 msg = result.fail_msg
5725 if self.op.ignore_consistency:
5726 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5727 " Proceeding anyway. Please make sure node"
5728 " %s is down. Error details: %s",
5729 instance.name, source_node, source_node, msg)
5731 raise errors.OpExecError("Could not shutdown instance %s on"
5733 (instance.name, source_node, msg))
5735 # create the target disks
5737 _CreateDisks(self, instance, target_node=target_node)
5738 except errors.OpExecError:
5739 self.LogWarning("Device creation failed, reverting...")
5741 _RemoveDisks(self, instance, target_node=target_node)
5743 self.cfg.ReleaseDRBDMinors(instance.name)
5746 cluster_name = self.cfg.GetClusterInfo().cluster_name
5749 # activate, get path, copy the data over
5750 for idx, disk in enumerate(instance.disks):
5751 self.LogInfo("Copying data for disk %d", idx)
5752 result = self.rpc.call_blockdev_assemble(target_node, disk,
5753 instance.name, True)
5755 self.LogWarning("Can't assemble newly created disk %d: %s",
5756 idx, result.fail_msg)
5757 errs.append(result.fail_msg)
5759 dev_path = result.payload
5760 result = self.rpc.call_blockdev_export(source_node, disk,
5761 target_node, dev_path,
5764 self.LogWarning("Can't copy data over for disk %d: %s",
5765 idx, result.fail_msg)
5766 errs.append(result.fail_msg)
5770 self.LogWarning("Some disks failed to copy, aborting")
5772 _RemoveDisks(self, instance, target_node=target_node)
5774 self.cfg.ReleaseDRBDMinors(instance.name)
5775 raise errors.OpExecError("Errors during disk copy: %s" %
5778 instance.primary_node = target_node
5779 self.cfg.Update(instance, feedback_fn)
5781 self.LogInfo("Removing the disks on the original node")
5782 _RemoveDisks(self, instance, target_node=source_node)
5784 # Only start the instance if it's marked as up
5785 if instance.admin_up:
5786 self.LogInfo("Starting instance %s on node %s",
5787 instance.name, target_node)
5789 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5790 ignore_secondaries=True)
5792 _ShutdownInstanceDisks(self, instance)
5793 raise errors.OpExecError("Can't activate the instance's disks")
5795 result = self.rpc.call_instance_start(target_node, instance, None, None)
5796 msg = result.fail_msg
5798 _ShutdownInstanceDisks(self, instance)
5799 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5800 (instance.name, target_node, msg))
5803 class LUMigrateNode(LogicalUnit):
5804 """Migrate all instances from a node.
5807 HPATH = "node-migrate"
5808 HTYPE = constants.HTYPE_NODE
5816 def ExpandNames(self):
5817 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5819 self.needed_locks = {
5820 locking.LEVEL_NODE: [self.op.node_name],
5823 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5825 # Create tasklets for migrating instances for all instances on this node
5829 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5830 logging.debug("Migrating instance %s", inst.name)
5831 names.append(inst.name)
5833 tasklets.append(TLMigrateInstance(self, inst.name, False))
5835 self.tasklets = tasklets
5837 # Declare instance locks
5838 self.needed_locks[locking.LEVEL_INSTANCE] = names
5840 def DeclareLocks(self, level):
5841 if level == locking.LEVEL_NODE:
5842 self._LockInstancesNodes()
5844 def BuildHooksEnv(self):
5847 This runs on the master, the primary and all the secondaries.
5851 "NODE_NAME": self.op.node_name,
5854 nl = [self.cfg.GetMasterNode()]
5856 return (env, nl, nl)
5859 class TLMigrateInstance(Tasklet):
5860 """Tasklet class for instance migration.
5863 @ivar live: whether the migration will be done live or non-live;
5864 this variable is initalized only after CheckPrereq has run
5867 def __init__(self, lu, instance_name, cleanup):
5868 """Initializes this class.
5871 Tasklet.__init__(self, lu)
5874 self.instance_name = instance_name
5875 self.cleanup = cleanup
5876 self.live = False # will be overridden later
5878 def CheckPrereq(self):
5879 """Check prerequisites.
5881 This checks that the instance is in the cluster.
5884 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5885 instance = self.cfg.GetInstanceInfo(instance_name)
5886 assert instance is not None
5888 if instance.disk_template != constants.DT_DRBD8:
5889 raise errors.OpPrereqError("Instance's disk layout is not"
5890 " drbd8, cannot migrate.", errors.ECODE_STATE)
5892 secondary_nodes = instance.secondary_nodes
5893 if not secondary_nodes:
5894 raise errors.ConfigurationError("No secondary node but using"
5895 " drbd8 disk template")
5897 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5899 target_node = secondary_nodes[0]
5900 # check memory requirements on the secondary node
5901 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5902 instance.name, i_be[constants.BE_MEMORY],
5903 instance.hypervisor)
5905 # check bridge existance
5906 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5908 if not self.cleanup:
5909 _CheckNodeNotDrained(self.lu, target_node)
5910 result = self.rpc.call_instance_migratable(instance.primary_node,
5912 result.Raise("Can't migrate, please use failover",
5913 prereq=True, ecode=errors.ECODE_STATE)
5915 self.instance = instance
5917 if self.lu.op.live is not None and self.lu.op.mode is not None:
5918 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5919 " parameters are accepted",
5921 if self.lu.op.live is not None:
5923 self.lu.op.mode = constants.HT_MIGRATION_LIVE
5925 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5926 # reset the 'live' parameter to None so that repeated
5927 # invocations of CheckPrereq do not raise an exception
5928 self.lu.op.live = None
5929 elif self.lu.op.mode is None:
5930 # read the default value from the hypervisor
5931 i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5932 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5934 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5936 def _WaitUntilSync(self):
5937 """Poll with custom rpc for disk sync.
5939 This uses our own step-based rpc call.
5942 self.feedback_fn("* wait until resync is done")
5946 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5948 self.instance.disks)
5950 for node, nres in result.items():
5951 nres.Raise("Cannot resync disks on node %s" % node)
5952 node_done, node_percent = nres.payload
5953 all_done = all_done and node_done
5954 if node_percent is not None:
5955 min_percent = min(min_percent, node_percent)
5957 if min_percent < 100:
5958 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5961 def _EnsureSecondary(self, node):
5962 """Demote a node to secondary.
5965 self.feedback_fn("* switching node %s to secondary mode" % node)
5967 for dev in self.instance.disks:
5968 self.cfg.SetDiskID(dev, node)
5970 result = self.rpc.call_blockdev_close(node, self.instance.name,
5971 self.instance.disks)
5972 result.Raise("Cannot change disk to secondary on node %s" % node)
5974 def _GoStandalone(self):
5975 """Disconnect from the network.
5978 self.feedback_fn("* changing into standalone mode")
5979 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5980 self.instance.disks)
5981 for node, nres in result.items():
5982 nres.Raise("Cannot disconnect disks node %s" % node)
5984 def _GoReconnect(self, multimaster):
5985 """Reconnect to the network.
5991 msg = "single-master"
5992 self.feedback_fn("* changing disks into %s mode" % msg)
5993 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5994 self.instance.disks,
5995 self.instance.name, multimaster)
5996 for node, nres in result.items():
5997 nres.Raise("Cannot change disks config on node %s" % node)
5999 def _ExecCleanup(self):
6000 """Try to cleanup after a failed migration.
6002 The cleanup is done by:
6003 - check that the instance is running only on one node
6004 (and update the config if needed)
6005 - change disks on its secondary node to secondary
6006 - wait until disks are fully synchronized
6007 - disconnect from the network
6008 - change disks into single-master mode
6009 - wait again until disks are fully synchronized
6012 instance = self.instance
6013 target_node = self.target_node
6014 source_node = self.source_node
6016 # check running on only one node
6017 self.feedback_fn("* checking where the instance actually runs"
6018 " (if this hangs, the hypervisor might be in"
6020 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6021 for node, result in ins_l.items():
6022 result.Raise("Can't contact node %s" % node)
6024 runningon_source = instance.name in ins_l[source_node].payload
6025 runningon_target = instance.name in ins_l[target_node].payload
6027 if runningon_source and runningon_target:
6028 raise errors.OpExecError("Instance seems to be running on two nodes,"
6029 " or the hypervisor is confused. You will have"
6030 " to ensure manually that it runs only on one"
6031 " and restart this operation.")
6033 if not (runningon_source or runningon_target):
6034 raise errors.OpExecError("Instance does not seem to be running at all."
6035 " In this case, it's safer to repair by"
6036 " running 'gnt-instance stop' to ensure disk"
6037 " shutdown, and then restarting it.")
6039 if runningon_target:
6040 # the migration has actually succeeded, we need to update the config
6041 self.feedback_fn("* instance running on secondary node (%s),"
6042 " updating config" % target_node)
6043 instance.primary_node = target_node
6044 self.cfg.Update(instance, self.feedback_fn)
6045 demoted_node = source_node
6047 self.feedback_fn("* instance confirmed to be running on its"
6048 " primary node (%s)" % source_node)
6049 demoted_node = target_node
6051 self._EnsureSecondary(demoted_node)
6053 self._WaitUntilSync()
6054 except errors.OpExecError:
6055 # we ignore here errors, since if the device is standalone, it
6056 # won't be able to sync
6058 self._GoStandalone()
6059 self._GoReconnect(False)
6060 self._WaitUntilSync()
6062 self.feedback_fn("* done")
6064 def _RevertDiskStatus(self):
6065 """Try to revert the disk status after a failed migration.
6068 target_node = self.target_node
6070 self._EnsureSecondary(target_node)
6071 self._GoStandalone()
6072 self._GoReconnect(False)
6073 self._WaitUntilSync()
6074 except errors.OpExecError, err:
6075 self.lu.LogWarning("Migration failed and I can't reconnect the"
6076 " drives: error '%s'\n"
6077 "Please look and recover the instance status" %
6080 def _AbortMigration(self):
6081 """Call the hypervisor code to abort a started migration.
6084 instance = self.instance
6085 target_node = self.target_node
6086 migration_info = self.migration_info
6088 abort_result = self.rpc.call_finalize_migration(target_node,
6092 abort_msg = abort_result.fail_msg
6094 logging.error("Aborting migration failed on target node %s: %s",
6095 target_node, abort_msg)
6096 # Don't raise an exception here, as we stil have to try to revert the
6097 # disk status, even if this step failed.
6099 def _ExecMigration(self):
6100 """Migrate an instance.
6102 The migrate is done by:
6103 - change the disks into dual-master mode
6104 - wait until disks are fully synchronized again
6105 - migrate the instance
6106 - change disks on the new secondary node (the old primary) to secondary
6107 - wait until disks are fully synchronized
6108 - change disks into single-master mode
6111 instance = self.instance
6112 target_node = self.target_node
6113 source_node = self.source_node
6115 self.feedback_fn("* checking disk consistency between source and target")
6116 for dev in instance.disks:
6117 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6118 raise errors.OpExecError("Disk %s is degraded or not fully"
6119 " synchronized on target node,"
6120 " aborting migrate." % dev.iv_name)
6122 # First get the migration information from the remote node
6123 result = self.rpc.call_migration_info(source_node, instance)
6124 msg = result.fail_msg
6126 log_err = ("Failed fetching source migration information from %s: %s" %
6128 logging.error(log_err)
6129 raise errors.OpExecError(log_err)
6131 self.migration_info = migration_info = result.payload
6133 # Then switch the disks to master/master mode
6134 self._EnsureSecondary(target_node)
6135 self._GoStandalone()
6136 self._GoReconnect(True)
6137 self._WaitUntilSync()
6139 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6140 result = self.rpc.call_accept_instance(target_node,
6143 self.nodes_ip[target_node])
6145 msg = result.fail_msg
6147 logging.error("Instance pre-migration failed, trying to revert"
6148 " disk status: %s", msg)
6149 self.feedback_fn("Pre-migration failed, aborting")
6150 self._AbortMigration()
6151 self._RevertDiskStatus()
6152 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6153 (instance.name, msg))
6155 self.feedback_fn("* migrating instance to %s" % target_node)
6157 result = self.rpc.call_instance_migrate(source_node, instance,
6158 self.nodes_ip[target_node],
6160 msg = result.fail_msg
6162 logging.error("Instance migration failed, trying to revert"
6163 " disk status: %s", msg)
6164 self.feedback_fn("Migration failed, aborting")
6165 self._AbortMigration()
6166 self._RevertDiskStatus()
6167 raise errors.OpExecError("Could not migrate instance %s: %s" %
6168 (instance.name, msg))
6171 instance.primary_node = target_node
6172 # distribute new instance config to the other nodes
6173 self.cfg.Update(instance, self.feedback_fn)
6175 result = self.rpc.call_finalize_migration(target_node,
6179 msg = result.fail_msg
6181 logging.error("Instance migration succeeded, but finalization failed:"
6183 raise errors.OpExecError("Could not finalize instance migration: %s" %
6186 self._EnsureSecondary(source_node)
6187 self._WaitUntilSync()
6188 self._GoStandalone()
6189 self._GoReconnect(False)
6190 self._WaitUntilSync()
6192 self.feedback_fn("* done")
6194 def Exec(self, feedback_fn):
6195 """Perform the migration.
6198 feedback_fn("Migrating instance %s" % self.instance.name)
6200 self.feedback_fn = feedback_fn
6202 self.source_node = self.instance.primary_node
6203 self.target_node = self.instance.secondary_nodes[0]
6204 self.all_nodes = [self.source_node, self.target_node]
6206 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6207 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6211 return self._ExecCleanup()
6213 return self._ExecMigration()
6216 def _CreateBlockDev(lu, node, instance, device, force_create,
6218 """Create a tree of block devices on a given node.
6220 If this device type has to be created on secondaries, create it and
6223 If not, just recurse to children keeping the same 'force' value.
6225 @param lu: the lu on whose behalf we execute
6226 @param node: the node on which to create the device
6227 @type instance: L{objects.Instance}
6228 @param instance: the instance which owns the device
6229 @type device: L{objects.Disk}
6230 @param device: the device to create
6231 @type force_create: boolean
6232 @param force_create: whether to force creation of this device; this
6233 will be change to True whenever we find a device which has
6234 CreateOnSecondary() attribute
6235 @param info: the extra 'metadata' we should attach to the device
6236 (this will be represented as a LVM tag)
6237 @type force_open: boolean
6238 @param force_open: this parameter will be passes to the
6239 L{backend.BlockdevCreate} function where it specifies
6240 whether we run on primary or not, and it affects both
6241 the child assembly and the device own Open() execution
6244 if device.CreateOnSecondary():
6248 for child in device.children:
6249 _CreateBlockDev(lu, node, instance, child, force_create,
6252 if not force_create:
6255 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6258 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6259 """Create a single block device on a given node.
6261 This will not recurse over children of the device, so they must be
6264 @param lu: the lu on whose behalf we execute
6265 @param node: the node on which to create the device
6266 @type instance: L{objects.Instance}
6267 @param instance: the instance which owns the device
6268 @type device: L{objects.Disk}
6269 @param device: the device to create
6270 @param info: the extra 'metadata' we should attach to the device
6271 (this will be represented as a LVM tag)
6272 @type force_open: boolean
6273 @param force_open: this parameter will be passes to the
6274 L{backend.BlockdevCreate} function where it specifies
6275 whether we run on primary or not, and it affects both
6276 the child assembly and the device own Open() execution
6279 lu.cfg.SetDiskID(device, node)
6280 result = lu.rpc.call_blockdev_create(node, device, device.size,
6281 instance.name, force_open, info)
6282 result.Raise("Can't create block device %s on"
6283 " node %s for instance %s" % (device, node, instance.name))
6284 if device.physical_id is None:
6285 device.physical_id = result.payload
6288 def _GenerateUniqueNames(lu, exts):
6289 """Generate a suitable LV name.
6291 This will generate a logical volume name for the given instance.
6296 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6297 results.append("%s%s" % (new_id, val))
6301 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6303 """Generate a drbd8 device complete with its children.
6306 port = lu.cfg.AllocatePort()
6307 vgname = lu.cfg.GetVGName()
6308 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6309 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6310 logical_id=(vgname, names[0]))
6311 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6312 logical_id=(vgname, names[1]))
6313 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6314 logical_id=(primary, secondary, port,
6317 children=[dev_data, dev_meta],
6322 def _GenerateDiskTemplate(lu, template_name,
6323 instance_name, primary_node,
6324 secondary_nodes, disk_info,
6325 file_storage_dir, file_driver,
6327 """Generate the entire disk layout for a given template type.
6330 #TODO: compute space requirements
6332 vgname = lu.cfg.GetVGName()
6333 disk_count = len(disk_info)
6335 if template_name == constants.DT_DISKLESS:
6337 elif template_name == constants.DT_PLAIN:
6338 if len(secondary_nodes) != 0:
6339 raise errors.ProgrammerError("Wrong template configuration")
6341 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6342 for i in range(disk_count)])
6343 for idx, disk in enumerate(disk_info):
6344 disk_index = idx + base_index
6345 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6346 logical_id=(vgname, names[idx]),
6347 iv_name="disk/%d" % disk_index,
6349 disks.append(disk_dev)
6350 elif template_name == constants.DT_DRBD8:
6351 if len(secondary_nodes) != 1:
6352 raise errors.ProgrammerError("Wrong template configuration")
6353 remote_node = secondary_nodes[0]
6354 minors = lu.cfg.AllocateDRBDMinor(
6355 [primary_node, remote_node] * len(disk_info), instance_name)
6358 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6359 for i in range(disk_count)]):
6360 names.append(lv_prefix + "_data")
6361 names.append(lv_prefix + "_meta")
6362 for idx, disk in enumerate(disk_info):
6363 disk_index = idx + base_index
6364 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6365 disk["size"], names[idx*2:idx*2+2],
6366 "disk/%d" % disk_index,
6367 minors[idx*2], minors[idx*2+1])
6368 disk_dev.mode = disk["mode"]
6369 disks.append(disk_dev)
6370 elif template_name == constants.DT_FILE:
6371 if len(secondary_nodes) != 0:
6372 raise errors.ProgrammerError("Wrong template configuration")
6374 _RequireFileStorage()
6376 for idx, disk in enumerate(disk_info):
6377 disk_index = idx + base_index
6378 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6379 iv_name="disk/%d" % disk_index,
6380 logical_id=(file_driver,
6381 "%s/disk%d" % (file_storage_dir,
6384 disks.append(disk_dev)
6386 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6390 def _GetInstanceInfoText(instance):
6391 """Compute that text that should be added to the disk's metadata.
6394 return "originstname+%s" % instance.name
6397 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6398 """Create all disks for an instance.
6400 This abstracts away some work from AddInstance.
6402 @type lu: L{LogicalUnit}
6403 @param lu: the logical unit on whose behalf we execute
6404 @type instance: L{objects.Instance}
6405 @param instance: the instance whose disks we should create
6407 @param to_skip: list of indices to skip
6408 @type target_node: string
6409 @param target_node: if passed, overrides the target node for creation
6411 @return: the success of the creation
6414 info = _GetInstanceInfoText(instance)
6415 if target_node is None:
6416 pnode = instance.primary_node
6417 all_nodes = instance.all_nodes
6422 if instance.disk_template == constants.DT_FILE:
6423 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6424 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6426 result.Raise("Failed to create directory '%s' on"
6427 " node %s" % (file_storage_dir, pnode))
6429 # Note: this needs to be kept in sync with adding of disks in
6430 # LUSetInstanceParams
6431 for idx, device in enumerate(instance.disks):
6432 if to_skip and idx in to_skip:
6434 logging.info("Creating volume %s for instance %s",
6435 device.iv_name, instance.name)
6437 for node in all_nodes:
6438 f_create = node == pnode
6439 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6442 def _RemoveDisks(lu, instance, target_node=None):
6443 """Remove all disks for an instance.
6445 This abstracts away some work from `AddInstance()` and
6446 `RemoveInstance()`. Note that in case some of the devices couldn't
6447 be removed, the removal will continue with the other ones (compare
6448 with `_CreateDisks()`).
6450 @type lu: L{LogicalUnit}
6451 @param lu: the logical unit on whose behalf we execute
6452 @type instance: L{objects.Instance}
6453 @param instance: the instance whose disks we should remove
6454 @type target_node: string
6455 @param target_node: used to override the node on which to remove the disks
6457 @return: the success of the removal
6460 logging.info("Removing block devices for instance %s", instance.name)
6463 for device in instance.disks:
6465 edata = [(target_node, device)]
6467 edata = device.ComputeNodeTree(instance.primary_node)
6468 for node, disk in edata:
6469 lu.cfg.SetDiskID(disk, node)
6470 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6472 lu.LogWarning("Could not remove block device %s on node %s,"
6473 " continuing anyway: %s", device.iv_name, node, msg)
6476 if instance.disk_template == constants.DT_FILE:
6477 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6481 tgt = instance.primary_node
6482 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6484 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6485 file_storage_dir, instance.primary_node, result.fail_msg)
6491 def _ComputeDiskSize(disk_template, disks):
6492 """Compute disk size requirements in the volume group
6495 # Required free disk space as a function of disk and swap space
6497 constants.DT_DISKLESS: None,
6498 constants.DT_PLAIN: sum(d["size"] for d in disks),
6499 # 128 MB are added for drbd metadata for each disk
6500 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6501 constants.DT_FILE: None,
6504 if disk_template not in req_size_dict:
6505 raise errors.ProgrammerError("Disk template '%s' size requirement"
6506 " is unknown" % disk_template)
6508 return req_size_dict[disk_template]
6511 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6512 """Hypervisor parameter validation.
6514 This function abstract the hypervisor parameter validation to be
6515 used in both instance create and instance modify.
6517 @type lu: L{LogicalUnit}
6518 @param lu: the logical unit for which we check
6519 @type nodenames: list
6520 @param nodenames: the list of nodes on which we should check
6521 @type hvname: string
6522 @param hvname: the name of the hypervisor we should use
6523 @type hvparams: dict
6524 @param hvparams: the parameters which we need to check
6525 @raise errors.OpPrereqError: if the parameters are not valid
6528 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6531 for node in nodenames:
6535 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6538 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6539 """OS parameters validation.
6541 @type lu: L{LogicalUnit}
6542 @param lu: the logical unit for which we check
6543 @type required: boolean
6544 @param required: whether the validation should fail if the OS is not
6546 @type nodenames: list
6547 @param nodenames: the list of nodes on which we should check
6548 @type osname: string
6549 @param osname: the name of the hypervisor we should use
6550 @type osparams: dict
6551 @param osparams: the parameters which we need to check
6552 @raise errors.OpPrereqError: if the parameters are not valid
6555 result = lu.rpc.call_os_validate(required, nodenames, osname,
6556 [constants.OS_VALIDATE_PARAMETERS],
6558 for node, nres in result.items():
6559 # we don't check for offline cases since this should be run only
6560 # against the master node and/or an instance's nodes
6561 nres.Raise("OS Parameters validation failed on node %s" % node)
6562 if not nres.payload:
6563 lu.LogInfo("OS %s not found on node %s, validation skipped",
6567 class LUCreateInstance(LogicalUnit):
6568 """Create an instance.
6571 HPATH = "instance-add"
6572 HTYPE = constants.HTYPE_INSTANCE
6575 ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6576 ("start", True, _TBool),
6577 ("wait_for_sync", True, _TBool),
6578 ("ip_check", True, _TBool),
6579 ("name_check", True, _TBool),
6580 ("disks", _NoDefault, _TListOf(_TDict)),
6581 ("nics", _NoDefault, _TListOf(_TDict)),
6582 ("hvparams", _EmptyDict, _TDict),
6583 ("beparams", _EmptyDict, _TDict),
6584 ("osparams", _EmptyDict, _TDict),
6585 ("no_install", None, _TMaybeBool),
6586 ("os_type", None, _TMaybeString),
6587 ("force_variant", False, _TBool),
6588 ("source_handshake", None, _TOr(_TList, _TNone)),
6589 ("source_x509_ca", None, _TMaybeString),
6590 ("source_instance_name", None, _TMaybeString),
6591 ("src_node", None, _TMaybeString),
6592 ("src_path", None, _TMaybeString),
6593 ("pnode", None, _TMaybeString),
6594 ("snode", None, _TMaybeString),
6595 ("iallocator", None, _TMaybeString),
6596 ("hypervisor", None, _TMaybeString),
6597 ("disk_template", _NoDefault, _CheckDiskTemplate),
6598 ("identify_defaults", False, _TBool),
6599 ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6600 ("file_storage_dir", None, _TMaybeString),
6604 def CheckArguments(self):
6608 # do not require name_check to ease forward/backward compatibility
6610 if self.op.no_install and self.op.start:
6611 self.LogInfo("No-installation mode selected, disabling startup")
6612 self.op.start = False
6613 # validate/normalize the instance name
6614 self.op.instance_name = \
6615 netutils.HostInfo.NormalizeName(self.op.instance_name)
6617 if self.op.ip_check and not self.op.name_check:
6618 # TODO: make the ip check more flexible and not depend on the name check
6619 raise errors.OpPrereqError("Cannot do ip check without a name check",
6622 # check nics' parameter names
6623 for nic in self.op.nics:
6624 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6626 # check disks. parameter names and consistent adopt/no-adopt strategy
6627 has_adopt = has_no_adopt = False
6628 for disk in self.op.disks:
6629 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6634 if has_adopt and has_no_adopt:
6635 raise errors.OpPrereqError("Either all disks are adopted or none is",
6638 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6639 raise errors.OpPrereqError("Disk adoption is not supported for the"
6640 " '%s' disk template" %
6641 self.op.disk_template,
6643 if self.op.iallocator is not None:
6644 raise errors.OpPrereqError("Disk adoption not allowed with an"
6645 " iallocator script", errors.ECODE_INVAL)
6646 if self.op.mode == constants.INSTANCE_IMPORT:
6647 raise errors.OpPrereqError("Disk adoption not allowed for"
6648 " instance import", errors.ECODE_INVAL)
6650 self.adopt_disks = has_adopt
6652 # instance name verification
6653 if self.op.name_check:
6654 self.hostname1 = netutils.GetHostInfo(self.op.instance_name)
6655 self.op.instance_name = self.hostname1.name
6656 # used in CheckPrereq for ip ping check
6657 self.check_ip = self.hostname1.ip
6658 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6659 raise errors.OpPrereqError("Remote imports require names to be checked" %
6662 self.check_ip = None
6664 # file storage checks
6665 if (self.op.file_driver and
6666 not self.op.file_driver in constants.FILE_DRIVER):
6667 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6668 self.op.file_driver, errors.ECODE_INVAL)
6670 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6671 raise errors.OpPrereqError("File storage directory path not absolute",
6674 ### Node/iallocator related checks
6675 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6677 if self.op.pnode is not None:
6678 if self.op.disk_template in constants.DTS_NET_MIRROR:
6679 if self.op.snode is None:
6680 raise errors.OpPrereqError("The networked disk templates need"
6681 " a mirror node", errors.ECODE_INVAL)
6683 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6685 self.op.snode = None
6687 self._cds = _GetClusterDomainSecret()
6689 if self.op.mode == constants.INSTANCE_IMPORT:
6690 # On import force_variant must be True, because if we forced it at
6691 # initial install, our only chance when importing it back is that it
6693 self.op.force_variant = True
6695 if self.op.no_install:
6696 self.LogInfo("No-installation mode has no effect during import")
6698 elif self.op.mode == constants.INSTANCE_CREATE:
6699 if self.op.os_type is None:
6700 raise errors.OpPrereqError("No guest OS specified",
6702 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_oss:
6703 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6704 " installation" % self.op.os_type,
6706 if self.op.disk_template is None:
6707 raise errors.OpPrereqError("No disk template specified",
6710 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6711 # Check handshake to ensure both clusters have the same domain secret
6712 src_handshake = self.op.source_handshake
6713 if not src_handshake:
6714 raise errors.OpPrereqError("Missing source handshake",
6717 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6720 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6723 # Load and check source CA
6724 self.source_x509_ca_pem = self.op.source_x509_ca
6725 if not self.source_x509_ca_pem:
6726 raise errors.OpPrereqError("Missing source X509 CA",
6730 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6732 except OpenSSL.crypto.Error, err:
6733 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6734 (err, ), errors.ECODE_INVAL)
6736 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6737 if errcode is not None:
6738 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6741 self.source_x509_ca = cert
6743 src_instance_name = self.op.source_instance_name
6744 if not src_instance_name:
6745 raise errors.OpPrereqError("Missing source instance name",
6748 norm_name = netutils.HostInfo.NormalizeName(src_instance_name)
6749 self.source_instance_name = netutils.GetHostInfo(norm_name).name
6752 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6753 self.op.mode, errors.ECODE_INVAL)
6755 def ExpandNames(self):
6756 """ExpandNames for CreateInstance.
6758 Figure out the right locks for instance creation.
6761 self.needed_locks = {}
6763 instance_name = self.op.instance_name
6764 # this is just a preventive check, but someone might still add this
6765 # instance in the meantime, and creation will fail at lock-add time
6766 if instance_name in self.cfg.GetInstanceList():
6767 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6768 instance_name, errors.ECODE_EXISTS)
6770 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6772 if self.op.iallocator:
6773 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6775 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6776 nodelist = [self.op.pnode]
6777 if self.op.snode is not None:
6778 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6779 nodelist.append(self.op.snode)
6780 self.needed_locks[locking.LEVEL_NODE] = nodelist
6782 # in case of import lock the source node too
6783 if self.op.mode == constants.INSTANCE_IMPORT:
6784 src_node = self.op.src_node
6785 src_path = self.op.src_path
6787 if src_path is None:
6788 self.op.src_path = src_path = self.op.instance_name
6790 if src_node is None:
6791 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6792 self.op.src_node = None
6793 if os.path.isabs(src_path):
6794 raise errors.OpPrereqError("Importing an instance from an absolute"
6795 " path requires a source node option.",
6798 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6799 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6800 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6801 if not os.path.isabs(src_path):
6802 self.op.src_path = src_path = \
6803 utils.PathJoin(constants.EXPORT_DIR, src_path)
6805 def _RunAllocator(self):
6806 """Run the allocator based on input opcode.
6809 nics = [n.ToDict() for n in self.nics]
6810 ial = IAllocator(self.cfg, self.rpc,
6811 mode=constants.IALLOCATOR_MODE_ALLOC,
6812 name=self.op.instance_name,
6813 disk_template=self.op.disk_template,
6816 vcpus=self.be_full[constants.BE_VCPUS],
6817 mem_size=self.be_full[constants.BE_MEMORY],
6820 hypervisor=self.op.hypervisor,
6823 ial.Run(self.op.iallocator)
6826 raise errors.OpPrereqError("Can't compute nodes using"
6827 " iallocator '%s': %s" %
6828 (self.op.iallocator, ial.info),
6830 if len(ial.result) != ial.required_nodes:
6831 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6832 " of nodes (%s), required %s" %
6833 (self.op.iallocator, len(ial.result),
6834 ial.required_nodes), errors.ECODE_FAULT)
6835 self.op.pnode = ial.result[0]
6836 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6837 self.op.instance_name, self.op.iallocator,
6838 utils.CommaJoin(ial.result))
6839 if ial.required_nodes == 2:
6840 self.op.snode = ial.result[1]
6842 def BuildHooksEnv(self):
6845 This runs on master, primary and secondary nodes of the instance.
6849 "ADD_MODE": self.op.mode,
6851 if self.op.mode == constants.INSTANCE_IMPORT:
6852 env["SRC_NODE"] = self.op.src_node
6853 env["SRC_PATH"] = self.op.src_path
6854 env["SRC_IMAGES"] = self.src_images
6856 env.update(_BuildInstanceHookEnv(
6857 name=self.op.instance_name,
6858 primary_node=self.op.pnode,
6859 secondary_nodes=self.secondaries,
6860 status=self.op.start,
6861 os_type=self.op.os_type,
6862 memory=self.be_full[constants.BE_MEMORY],
6863 vcpus=self.be_full[constants.BE_VCPUS],
6864 nics=_NICListToTuple(self, self.nics),
6865 disk_template=self.op.disk_template,
6866 disks=[(d["size"], d["mode"]) for d in self.disks],
6869 hypervisor_name=self.op.hypervisor,
6872 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6876 def _ReadExportInfo(self):
6877 """Reads the export information from disk.
6879 It will override the opcode source node and path with the actual
6880 information, if these two were not specified before.
6882 @return: the export information
6885 assert self.op.mode == constants.INSTANCE_IMPORT
6887 src_node = self.op.src_node
6888 src_path = self.op.src_path
6890 if src_node is None:
6891 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6892 exp_list = self.rpc.call_export_list(locked_nodes)
6894 for node in exp_list:
6895 if exp_list[node].fail_msg:
6897 if src_path in exp_list[node].payload:
6899 self.op.src_node = src_node = node
6900 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6904 raise errors.OpPrereqError("No export found for relative path %s" %
6905 src_path, errors.ECODE_INVAL)
6907 _CheckNodeOnline(self, src_node)
6908 result = self.rpc.call_export_info(src_node, src_path)
6909 result.Raise("No export or invalid export found in dir %s" % src_path)
6911 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6912 if not export_info.has_section(constants.INISECT_EXP):
6913 raise errors.ProgrammerError("Corrupted export config",
6914 errors.ECODE_ENVIRON)
6916 ei_version = export_info.get(constants.INISECT_EXP, "version")
6917 if (int(ei_version) != constants.EXPORT_VERSION):
6918 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6919 (ei_version, constants.EXPORT_VERSION),
6920 errors.ECODE_ENVIRON)
6923 def _ReadExportParams(self, einfo):
6924 """Use export parameters as defaults.
6926 In case the opcode doesn't specify (as in override) some instance
6927 parameters, then try to use them from the export information, if
6931 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6933 if self.op.disk_template is None:
6934 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6935 self.op.disk_template = einfo.get(constants.INISECT_INS,
6938 raise errors.OpPrereqError("No disk template specified and the export"
6939 " is missing the disk_template information",
6942 if not self.op.disks:
6943 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6945 # TODO: import the disk iv_name too
6946 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6947 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6948 disks.append({"size": disk_sz})
6949 self.op.disks = disks
6951 raise errors.OpPrereqError("No disk info specified and the export"
6952 " is missing the disk information",
6955 if (not self.op.nics and
6956 einfo.has_option(constants.INISECT_INS, "nic_count")):
6958 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6960 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6961 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6966 if (self.op.hypervisor is None and
6967 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6968 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6969 if einfo.has_section(constants.INISECT_HYP):
6970 # use the export parameters but do not override the ones
6971 # specified by the user
6972 for name, value in einfo.items(constants.INISECT_HYP):
6973 if name not in self.op.hvparams:
6974 self.op.hvparams[name] = value
6976 if einfo.has_section(constants.INISECT_BEP):
6977 # use the parameters, without overriding
6978 for name, value in einfo.items(constants.INISECT_BEP):
6979 if name not in self.op.beparams:
6980 self.op.beparams[name] = value
6982 # try to read the parameters old style, from the main section
6983 for name in constants.BES_PARAMETERS:
6984 if (name not in self.op.beparams and
6985 einfo.has_option(constants.INISECT_INS, name)):
6986 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6988 if einfo.has_section(constants.INISECT_OSP):
6989 # use the parameters, without overriding
6990 for name, value in einfo.items(constants.INISECT_OSP):
6991 if name not in self.op.osparams:
6992 self.op.osparams[name] = value
6994 def _RevertToDefaults(self, cluster):
6995 """Revert the instance parameters to the default values.
6999 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7000 for name in self.op.hvparams.keys():
7001 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7002 del self.op.hvparams[name]
7004 be_defs = cluster.SimpleFillBE({})
7005 for name in self.op.beparams.keys():
7006 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7007 del self.op.beparams[name]
7009 nic_defs = cluster.SimpleFillNIC({})
7010 for nic in self.op.nics:
7011 for name in constants.NICS_PARAMETERS:
7012 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7015 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7016 for name in self.op.osparams.keys():
7017 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7018 del self.op.osparams[name]
7020 def CheckPrereq(self):
7021 """Check prerequisites.
7024 if self.op.mode == constants.INSTANCE_IMPORT:
7025 export_info = self._ReadExportInfo()
7026 self._ReadExportParams(export_info)
7028 _CheckDiskTemplate(self.op.disk_template)
7030 if (not self.cfg.GetVGName() and
7031 self.op.disk_template not in constants.DTS_NOT_LVM):
7032 raise errors.OpPrereqError("Cluster does not support lvm-based"
7033 " instances", errors.ECODE_STATE)
7035 if self.op.hypervisor is None:
7036 self.op.hypervisor = self.cfg.GetHypervisorType()
7038 cluster = self.cfg.GetClusterInfo()
7039 enabled_hvs = cluster.enabled_hypervisors
7040 if self.op.hypervisor not in enabled_hvs:
7041 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7042 " cluster (%s)" % (self.op.hypervisor,
7043 ",".join(enabled_hvs)),
7046 # check hypervisor parameter syntax (locally)
7047 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7048 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7050 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7051 hv_type.CheckParameterSyntax(filled_hvp)
7052 self.hv_full = filled_hvp
7053 # check that we don't specify global parameters on an instance
7054 _CheckGlobalHvParams(self.op.hvparams)
7056 # fill and remember the beparams dict
7057 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7058 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7060 # build os parameters
7061 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7063 # now that hvp/bep are in final format, let's reset to defaults,
7065 if self.op.identify_defaults:
7066 self._RevertToDefaults(cluster)
7070 for idx, nic in enumerate(self.op.nics):
7071 nic_mode_req = nic.get("mode", None)
7072 nic_mode = nic_mode_req
7073 if nic_mode is None:
7074 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7076 # in routed mode, for the first nic, the default ip is 'auto'
7077 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7078 default_ip_mode = constants.VALUE_AUTO
7080 default_ip_mode = constants.VALUE_NONE
7082 # ip validity checks
7083 ip = nic.get("ip", default_ip_mode)
7084 if ip is None or ip.lower() == constants.VALUE_NONE:
7086 elif ip.lower() == constants.VALUE_AUTO:
7087 if not self.op.name_check:
7088 raise errors.OpPrereqError("IP address set to auto but name checks"
7089 " have been skipped. Aborting.",
7091 nic_ip = self.hostname1.ip
7093 if not netutils.IsValidIP4(ip):
7094 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
7095 " like a valid IP" % ip,
7099 # TODO: check the ip address for uniqueness
7100 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7101 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7104 # MAC address verification
7105 mac = nic.get("mac", constants.VALUE_AUTO)
7106 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7107 mac = utils.NormalizeAndValidateMac(mac)
7110 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7111 except errors.ReservationError:
7112 raise errors.OpPrereqError("MAC address %s already in use"
7113 " in cluster" % mac,
7114 errors.ECODE_NOTUNIQUE)
7116 # bridge verification
7117 bridge = nic.get("bridge", None)
7118 link = nic.get("link", None)
7120 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7121 " at the same time", errors.ECODE_INVAL)
7122 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7123 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7130 nicparams[constants.NIC_MODE] = nic_mode_req
7132 nicparams[constants.NIC_LINK] = link
7134 check_params = cluster.SimpleFillNIC(nicparams)
7135 objects.NIC.CheckParameterSyntax(check_params)
7136 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7138 # disk checks/pre-build
7140 for disk in self.op.disks:
7141 mode = disk.get("mode", constants.DISK_RDWR)
7142 if mode not in constants.DISK_ACCESS_SET:
7143 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7144 mode, errors.ECODE_INVAL)
7145 size = disk.get("size", None)
7147 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7150 except (TypeError, ValueError):
7151 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7153 new_disk = {"size": size, "mode": mode}
7155 new_disk["adopt"] = disk["adopt"]
7156 self.disks.append(new_disk)
7158 if self.op.mode == constants.INSTANCE_IMPORT:
7160 # Check that the new instance doesn't have less disks than the export
7161 instance_disks = len(self.disks)
7162 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7163 if instance_disks < export_disks:
7164 raise errors.OpPrereqError("Not enough disks to import."
7165 " (instance: %d, export: %d)" %
7166 (instance_disks, export_disks),
7170 for idx in range(export_disks):
7171 option = 'disk%d_dump' % idx
7172 if export_info.has_option(constants.INISECT_INS, option):
7173 # FIXME: are the old os-es, disk sizes, etc. useful?
7174 export_name = export_info.get(constants.INISECT_INS, option)
7175 image = utils.PathJoin(self.op.src_path, export_name)
7176 disk_images.append(image)
7178 disk_images.append(False)
7180 self.src_images = disk_images
7182 old_name = export_info.get(constants.INISECT_INS, 'name')
7184 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7185 except (TypeError, ValueError), err:
7186 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7187 " an integer: %s" % str(err),
7189 if self.op.instance_name == old_name:
7190 for idx, nic in enumerate(self.nics):
7191 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7192 nic_mac_ini = 'nic%d_mac' % idx
7193 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7195 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7197 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7198 if self.op.ip_check:
7199 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7200 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7201 (self.check_ip, self.op.instance_name),
7202 errors.ECODE_NOTUNIQUE)
7204 #### mac address generation
7205 # By generating here the mac address both the allocator and the hooks get
7206 # the real final mac address rather than the 'auto' or 'generate' value.
7207 # There is a race condition between the generation and the instance object
7208 # creation, which means that we know the mac is valid now, but we're not
7209 # sure it will be when we actually add the instance. If things go bad
7210 # adding the instance will abort because of a duplicate mac, and the
7211 # creation job will fail.
7212 for nic in self.nics:
7213 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7214 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7218 if self.op.iallocator is not None:
7219 self._RunAllocator()
7221 #### node related checks
7223 # check primary node
7224 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7225 assert self.pnode is not None, \
7226 "Cannot retrieve locked node %s" % self.op.pnode
7228 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7229 pnode.name, errors.ECODE_STATE)
7231 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7232 pnode.name, errors.ECODE_STATE)
7234 self.secondaries = []
7236 # mirror node verification
7237 if self.op.disk_template in constants.DTS_NET_MIRROR:
7238 if self.op.snode == pnode.name:
7239 raise errors.OpPrereqError("The secondary node cannot be the"
7240 " primary node.", errors.ECODE_INVAL)
7241 _CheckNodeOnline(self, self.op.snode)
7242 _CheckNodeNotDrained(self, self.op.snode)
7243 self.secondaries.append(self.op.snode)
7245 nodenames = [pnode.name] + self.secondaries
7247 req_size = _ComputeDiskSize(self.op.disk_template,
7250 # Check lv size requirements, if not adopting
7251 if req_size is not None and not self.adopt_disks:
7252 _CheckNodesFreeDisk(self, nodenames, req_size)
7254 if self.adopt_disks: # instead, we must check the adoption data
7255 all_lvs = set([i["adopt"] for i in self.disks])
7256 if len(all_lvs) != len(self.disks):
7257 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7259 for lv_name in all_lvs:
7261 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7262 except errors.ReservationError:
7263 raise errors.OpPrereqError("LV named %s used by another instance" %
7264 lv_name, errors.ECODE_NOTUNIQUE)
7266 node_lvs = self.rpc.call_lv_list([pnode.name],
7267 self.cfg.GetVGName())[pnode.name]
7268 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7269 node_lvs = node_lvs.payload
7270 delta = all_lvs.difference(node_lvs.keys())
7272 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7273 utils.CommaJoin(delta),
7275 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7277 raise errors.OpPrereqError("Online logical volumes found, cannot"
7278 " adopt: %s" % utils.CommaJoin(online_lvs),
7280 # update the size of disk based on what is found
7281 for dsk in self.disks:
7282 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7284 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7286 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7287 # check OS parameters (remotely)
7288 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7290 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7292 # memory check on primary node
7294 _CheckNodeFreeMemory(self, self.pnode.name,
7295 "creating instance %s" % self.op.instance_name,
7296 self.be_full[constants.BE_MEMORY],
7299 self.dry_run_result = list(nodenames)
7301 def Exec(self, feedback_fn):
7302 """Create and add the instance to the cluster.
7305 instance = self.op.instance_name
7306 pnode_name = self.pnode.name
7308 ht_kind = self.op.hypervisor
7309 if ht_kind in constants.HTS_REQ_PORT:
7310 network_port = self.cfg.AllocatePort()
7314 if constants.ENABLE_FILE_STORAGE:
7315 # this is needed because os.path.join does not accept None arguments
7316 if self.op.file_storage_dir is None:
7317 string_file_storage_dir = ""
7319 string_file_storage_dir = self.op.file_storage_dir
7321 # build the full file storage dir path
7322 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7323 string_file_storage_dir, instance)
7325 file_storage_dir = ""
7327 disks = _GenerateDiskTemplate(self,
7328 self.op.disk_template,
7329 instance, pnode_name,
7333 self.op.file_driver,
7336 iobj = objects.Instance(name=instance, os=self.op.os_type,
7337 primary_node=pnode_name,
7338 nics=self.nics, disks=disks,
7339 disk_template=self.op.disk_template,
7341 network_port=network_port,
7342 beparams=self.op.beparams,
7343 hvparams=self.op.hvparams,
7344 hypervisor=self.op.hypervisor,
7345 osparams=self.op.osparams,
7348 if self.adopt_disks:
7349 # rename LVs to the newly-generated names; we need to construct
7350 # 'fake' LV disks with the old data, plus the new unique_id
7351 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7353 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7354 rename_to.append(t_dsk.logical_id)
7355 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7356 self.cfg.SetDiskID(t_dsk, pnode_name)
7357 result = self.rpc.call_blockdev_rename(pnode_name,
7358 zip(tmp_disks, rename_to))
7359 result.Raise("Failed to rename adoped LVs")
7361 feedback_fn("* creating instance disks...")
7363 _CreateDisks(self, iobj)
7364 except errors.OpExecError:
7365 self.LogWarning("Device creation failed, reverting...")
7367 _RemoveDisks(self, iobj)
7369 self.cfg.ReleaseDRBDMinors(instance)
7372 feedback_fn("adding instance %s to cluster config" % instance)
7374 self.cfg.AddInstance(iobj, self.proc.GetECId())
7376 # Declare that we don't want to remove the instance lock anymore, as we've
7377 # added the instance to the config
7378 del self.remove_locks[locking.LEVEL_INSTANCE]
7379 # Unlock all the nodes
7380 if self.op.mode == constants.INSTANCE_IMPORT:
7381 nodes_keep = [self.op.src_node]
7382 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7383 if node != self.op.src_node]
7384 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7385 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7387 self.context.glm.release(locking.LEVEL_NODE)
7388 del self.acquired_locks[locking.LEVEL_NODE]
7390 if self.op.wait_for_sync:
7391 disk_abort = not _WaitForSync(self, iobj)
7392 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7393 # make sure the disks are not degraded (still sync-ing is ok)
7395 feedback_fn("* checking mirrors status")
7396 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7401 _RemoveDisks(self, iobj)
7402 self.cfg.RemoveInstance(iobj.name)
7403 # Make sure the instance lock gets removed
7404 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7405 raise errors.OpExecError("There are some degraded disks for"
7408 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7409 if self.op.mode == constants.INSTANCE_CREATE:
7410 if not self.op.no_install:
7411 feedback_fn("* running the instance OS create scripts...")
7412 # FIXME: pass debug option from opcode to backend
7413 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7414 self.op.debug_level)
7415 result.Raise("Could not add os for instance %s"
7416 " on node %s" % (instance, pnode_name))
7418 elif self.op.mode == constants.INSTANCE_IMPORT:
7419 feedback_fn("* running the instance OS import scripts...")
7423 for idx, image in enumerate(self.src_images):
7427 # FIXME: pass debug option from opcode to backend
7428 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7429 constants.IEIO_FILE, (image, ),
7430 constants.IEIO_SCRIPT,
7431 (iobj.disks[idx], idx),
7433 transfers.append(dt)
7436 masterd.instance.TransferInstanceData(self, feedback_fn,
7437 self.op.src_node, pnode_name,
7438 self.pnode.secondary_ip,
7440 if not compat.all(import_result):
7441 self.LogWarning("Some disks for instance %s on node %s were not"
7442 " imported successfully" % (instance, pnode_name))
7444 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7445 feedback_fn("* preparing remote import...")
7446 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7447 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7449 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7450 self.source_x509_ca,
7451 self._cds, timeouts)
7452 if not compat.all(disk_results):
7453 # TODO: Should the instance still be started, even if some disks
7454 # failed to import (valid for local imports, too)?
7455 self.LogWarning("Some disks for instance %s on node %s were not"
7456 " imported successfully" % (instance, pnode_name))
7458 # Run rename script on newly imported instance
7459 assert iobj.name == instance
7460 feedback_fn("Running rename script for %s" % instance)
7461 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7462 self.source_instance_name,
7463 self.op.debug_level)
7465 self.LogWarning("Failed to run rename script for %s on node"
7466 " %s: %s" % (instance, pnode_name, result.fail_msg))
7469 # also checked in the prereq part
7470 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7474 iobj.admin_up = True
7475 self.cfg.Update(iobj, feedback_fn)
7476 logging.info("Starting instance %s on node %s", instance, pnode_name)
7477 feedback_fn("* starting instance...")
7478 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7479 result.Raise("Could not start instance")
7481 return list(iobj.all_nodes)
7484 class LUConnectConsole(NoHooksLU):
7485 """Connect to an instance's console.
7487 This is somewhat special in that it returns the command line that
7488 you need to run on the master node in order to connect to the
7497 def ExpandNames(self):
7498 self._ExpandAndLockInstance()
7500 def CheckPrereq(self):
7501 """Check prerequisites.
7503 This checks that the instance is in the cluster.
7506 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7507 assert self.instance is not None, \
7508 "Cannot retrieve locked instance %s" % self.op.instance_name
7509 _CheckNodeOnline(self, self.instance.primary_node)
7511 def Exec(self, feedback_fn):
7512 """Connect to the console of an instance
7515 instance = self.instance
7516 node = instance.primary_node
7518 node_insts = self.rpc.call_instance_list([node],
7519 [instance.hypervisor])[node]
7520 node_insts.Raise("Can't get node information from %s" % node)
7522 if instance.name not in node_insts.payload:
7523 raise errors.OpExecError("Instance %s is not running." % instance.name)
7525 logging.debug("Connecting to console of %s on %s", instance.name, node)
7527 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7528 cluster = self.cfg.GetClusterInfo()
7529 # beparams and hvparams are passed separately, to avoid editing the
7530 # instance and then saving the defaults in the instance itself.
7531 hvparams = cluster.FillHV(instance)
7532 beparams = cluster.FillBE(instance)
7533 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7536 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7539 class LUReplaceDisks(LogicalUnit):
7540 """Replace the disks of an instance.
7543 HPATH = "mirrors-replace"
7544 HTYPE = constants.HTYPE_INSTANCE
7547 ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7548 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7549 ("remote_node", None, _TMaybeString),
7550 ("iallocator", None, _TMaybeString),
7551 ("early_release", False, _TBool),
7555 def CheckArguments(self):
7556 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7559 def ExpandNames(self):
7560 self._ExpandAndLockInstance()
7562 if self.op.iallocator is not None:
7563 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7565 elif self.op.remote_node is not None:
7566 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7567 self.op.remote_node = remote_node
7569 # Warning: do not remove the locking of the new secondary here
7570 # unless DRBD8.AddChildren is changed to work in parallel;
7571 # currently it doesn't since parallel invocations of
7572 # FindUnusedMinor will conflict
7573 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7574 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7577 self.needed_locks[locking.LEVEL_NODE] = []
7578 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7580 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7581 self.op.iallocator, self.op.remote_node,
7582 self.op.disks, False, self.op.early_release)
7584 self.tasklets = [self.replacer]
7586 def DeclareLocks(self, level):
7587 # If we're not already locking all nodes in the set we have to declare the
7588 # instance's primary/secondary nodes.
7589 if (level == locking.LEVEL_NODE and
7590 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7591 self._LockInstancesNodes()
7593 def BuildHooksEnv(self):
7596 This runs on the master, the primary and all the secondaries.
7599 instance = self.replacer.instance
7601 "MODE": self.op.mode,
7602 "NEW_SECONDARY": self.op.remote_node,
7603 "OLD_SECONDARY": instance.secondary_nodes[0],
7605 env.update(_BuildInstanceHookEnvByObject(self, instance))
7607 self.cfg.GetMasterNode(),
7608 instance.primary_node,
7610 if self.op.remote_node is not None:
7611 nl.append(self.op.remote_node)
7615 class TLReplaceDisks(Tasklet):
7616 """Replaces disks for an instance.
7618 Note: Locking is not within the scope of this class.
7621 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7622 disks, delay_iallocator, early_release):
7623 """Initializes this class.
7626 Tasklet.__init__(self, lu)
7629 self.instance_name = instance_name
7631 self.iallocator_name = iallocator_name
7632 self.remote_node = remote_node
7634 self.delay_iallocator = delay_iallocator
7635 self.early_release = early_release
7638 self.instance = None
7639 self.new_node = None
7640 self.target_node = None
7641 self.other_node = None
7642 self.remote_node_info = None
7643 self.node_secondary_ip = None
7646 def CheckArguments(mode, remote_node, iallocator):
7647 """Helper function for users of this class.
7650 # check for valid parameter combination
7651 if mode == constants.REPLACE_DISK_CHG:
7652 if remote_node is None and iallocator is None:
7653 raise errors.OpPrereqError("When changing the secondary either an"
7654 " iallocator script must be used or the"
7655 " new node given", errors.ECODE_INVAL)
7657 if remote_node is not None and iallocator is not None:
7658 raise errors.OpPrereqError("Give either the iallocator or the new"
7659 " secondary, not both", errors.ECODE_INVAL)
7661 elif remote_node is not None or iallocator is not None:
7662 # Not replacing the secondary
7663 raise errors.OpPrereqError("The iallocator and new node options can"
7664 " only be used when changing the"
7665 " secondary node", errors.ECODE_INVAL)
7668 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7669 """Compute a new secondary node using an IAllocator.
7672 ial = IAllocator(lu.cfg, lu.rpc,
7673 mode=constants.IALLOCATOR_MODE_RELOC,
7675 relocate_from=relocate_from)
7677 ial.Run(iallocator_name)
7680 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7681 " %s" % (iallocator_name, ial.info),
7684 if len(ial.result) != ial.required_nodes:
7685 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7686 " of nodes (%s), required %s" %
7688 len(ial.result), ial.required_nodes),
7691 remote_node_name = ial.result[0]
7693 lu.LogInfo("Selected new secondary for instance '%s': %s",
7694 instance_name, remote_node_name)
7696 return remote_node_name
7698 def _FindFaultyDisks(self, node_name):
7699 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7702 def CheckPrereq(self):
7703 """Check prerequisites.
7705 This checks that the instance is in the cluster.
7708 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7709 assert instance is not None, \
7710 "Cannot retrieve locked instance %s" % self.instance_name
7712 if instance.disk_template != constants.DT_DRBD8:
7713 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7714 " instances", errors.ECODE_INVAL)
7716 if len(instance.secondary_nodes) != 1:
7717 raise errors.OpPrereqError("The instance has a strange layout,"
7718 " expected one secondary but found %d" %
7719 len(instance.secondary_nodes),
7722 if not self.delay_iallocator:
7723 self._CheckPrereq2()
7725 def _CheckPrereq2(self):
7726 """Check prerequisites, second part.
7728 This function should always be part of CheckPrereq. It was separated and is
7729 now called from Exec because during node evacuation iallocator was only
7730 called with an unmodified cluster model, not taking planned changes into
7734 instance = self.instance
7735 secondary_node = instance.secondary_nodes[0]
7737 if self.iallocator_name is None:
7738 remote_node = self.remote_node
7740 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7741 instance.name, instance.secondary_nodes)
7743 if remote_node is not None:
7744 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7745 assert self.remote_node_info is not None, \
7746 "Cannot retrieve locked node %s" % remote_node
7748 self.remote_node_info = None
7750 if remote_node == self.instance.primary_node:
7751 raise errors.OpPrereqError("The specified node is the primary node of"
7752 " the instance.", errors.ECODE_INVAL)
7754 if remote_node == secondary_node:
7755 raise errors.OpPrereqError("The specified node is already the"
7756 " secondary node of the instance.",
7759 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7760 constants.REPLACE_DISK_CHG):
7761 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7764 if self.mode == constants.REPLACE_DISK_AUTO:
7765 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7766 faulty_secondary = self._FindFaultyDisks(secondary_node)
7768 if faulty_primary and faulty_secondary:
7769 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7770 " one node and can not be repaired"
7771 " automatically" % self.instance_name,
7775 self.disks = faulty_primary
7776 self.target_node = instance.primary_node
7777 self.other_node = secondary_node
7778 check_nodes = [self.target_node, self.other_node]
7779 elif faulty_secondary:
7780 self.disks = faulty_secondary
7781 self.target_node = secondary_node
7782 self.other_node = instance.primary_node
7783 check_nodes = [self.target_node, self.other_node]
7789 # Non-automatic modes
7790 if self.mode == constants.REPLACE_DISK_PRI:
7791 self.target_node = instance.primary_node
7792 self.other_node = secondary_node
7793 check_nodes = [self.target_node, self.other_node]
7795 elif self.mode == constants.REPLACE_DISK_SEC:
7796 self.target_node = secondary_node
7797 self.other_node = instance.primary_node
7798 check_nodes = [self.target_node, self.other_node]
7800 elif self.mode == constants.REPLACE_DISK_CHG:
7801 self.new_node = remote_node
7802 self.other_node = instance.primary_node
7803 self.target_node = secondary_node
7804 check_nodes = [self.new_node, self.other_node]
7806 _CheckNodeNotDrained(self.lu, remote_node)
7808 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7809 assert old_node_info is not None
7810 if old_node_info.offline and not self.early_release:
7811 # doesn't make sense to delay the release
7812 self.early_release = True
7813 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7814 " early-release mode", secondary_node)
7817 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7820 # If not specified all disks should be replaced
7822 self.disks = range(len(self.instance.disks))
7824 for node in check_nodes:
7825 _CheckNodeOnline(self.lu, node)
7827 # Check whether disks are valid
7828 for disk_idx in self.disks:
7829 instance.FindDisk(disk_idx)
7831 # Get secondary node IP addresses
7834 for node_name in [self.target_node, self.other_node, self.new_node]:
7835 if node_name is not None:
7836 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7838 self.node_secondary_ip = node_2nd_ip
7840 def Exec(self, feedback_fn):
7841 """Execute disk replacement.
7843 This dispatches the disk replacement to the appropriate handler.
7846 if self.delay_iallocator:
7847 self._CheckPrereq2()
7850 feedback_fn("No disks need replacement")
7853 feedback_fn("Replacing disk(s) %s for %s" %
7854 (utils.CommaJoin(self.disks), self.instance.name))
7856 activate_disks = (not self.instance.admin_up)
7858 # Activate the instance disks if we're replacing them on a down instance
7860 _StartInstanceDisks(self.lu, self.instance, True)
7863 # Should we replace the secondary node?
7864 if self.new_node is not None:
7865 fn = self._ExecDrbd8Secondary
7867 fn = self._ExecDrbd8DiskOnly
7869 return fn(feedback_fn)
7872 # Deactivate the instance disks if we're replacing them on a
7875 _SafeShutdownInstanceDisks(self.lu, self.instance)
7877 def _CheckVolumeGroup(self, nodes):
7878 self.lu.LogInfo("Checking volume groups")
7880 vgname = self.cfg.GetVGName()
7882 # Make sure volume group exists on all involved nodes
7883 results = self.rpc.call_vg_list(nodes)
7885 raise errors.OpExecError("Can't list volume groups on the nodes")
7889 res.Raise("Error checking node %s" % node)
7890 if vgname not in res.payload:
7891 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7894 def _CheckDisksExistence(self, nodes):
7895 # Check disk existence
7896 for idx, dev in enumerate(self.instance.disks):
7897 if idx not in self.disks:
7901 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7902 self.cfg.SetDiskID(dev, node)
7904 result = self.rpc.call_blockdev_find(node, dev)
7906 msg = result.fail_msg
7907 if msg or not result.payload:
7909 msg = "disk not found"
7910 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7913 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7914 for idx, dev in enumerate(self.instance.disks):
7915 if idx not in self.disks:
7918 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7921 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7923 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7924 " replace disks for instance %s" %
7925 (node_name, self.instance.name))
7927 def _CreateNewStorage(self, node_name):
7928 vgname = self.cfg.GetVGName()
7931 for idx, dev in enumerate(self.instance.disks):
7932 if idx not in self.disks:
7935 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7937 self.cfg.SetDiskID(dev, node_name)
7939 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7940 names = _GenerateUniqueNames(self.lu, lv_names)
7942 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7943 logical_id=(vgname, names[0]))
7944 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7945 logical_id=(vgname, names[1]))
7947 new_lvs = [lv_data, lv_meta]
7948 old_lvs = dev.children
7949 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7951 # we pass force_create=True to force the LVM creation
7952 for new_lv in new_lvs:
7953 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7954 _GetInstanceInfoText(self.instance), False)
7958 def _CheckDevices(self, node_name, iv_names):
7959 for name, (dev, _, _) in iv_names.iteritems():
7960 self.cfg.SetDiskID(dev, node_name)
7962 result = self.rpc.call_blockdev_find(node_name, dev)
7964 msg = result.fail_msg
7965 if msg or not result.payload:
7967 msg = "disk not found"
7968 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7971 if result.payload.is_degraded:
7972 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7974 def _RemoveOldStorage(self, node_name, iv_names):
7975 for name, (_, old_lvs, _) in iv_names.iteritems():
7976 self.lu.LogInfo("Remove logical volumes for %s" % name)
7979 self.cfg.SetDiskID(lv, node_name)
7981 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7983 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7984 hint="remove unused LVs manually")
7986 def _ReleaseNodeLock(self, node_name):
7987 """Releases the lock for a given node."""
7988 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7990 def _ExecDrbd8DiskOnly(self, feedback_fn):
7991 """Replace a disk on the primary or secondary for DRBD 8.
7993 The algorithm for replace is quite complicated:
7995 1. for each disk to be replaced:
7997 1. create new LVs on the target node with unique names
7998 1. detach old LVs from the drbd device
7999 1. rename old LVs to name_replaced.<time_t>
8000 1. rename new LVs to old LVs
8001 1. attach the new LVs (with the old names now) to the drbd device
8003 1. wait for sync across all devices
8005 1. for each modified disk:
8007 1. remove old LVs (which have the name name_replaces.<time_t>)
8009 Failures are not very well handled.
8014 # Step: check device activation
8015 self.lu.LogStep(1, steps_total, "Check device existence")
8016 self._CheckDisksExistence([self.other_node, self.target_node])
8017 self._CheckVolumeGroup([self.target_node, self.other_node])
8019 # Step: check other node consistency
8020 self.lu.LogStep(2, steps_total, "Check peer consistency")
8021 self._CheckDisksConsistency(self.other_node,
8022 self.other_node == self.instance.primary_node,
8025 # Step: create new storage
8026 self.lu.LogStep(3, steps_total, "Allocate new storage")
8027 iv_names = self._CreateNewStorage(self.target_node)
8029 # Step: for each lv, detach+rename*2+attach
8030 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8031 for dev, old_lvs, new_lvs in iv_names.itervalues():
8032 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8034 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8036 result.Raise("Can't detach drbd from local storage on node"
8037 " %s for device %s" % (self.target_node, dev.iv_name))
8039 #cfg.Update(instance)
8041 # ok, we created the new LVs, so now we know we have the needed
8042 # storage; as such, we proceed on the target node to rename
8043 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8044 # using the assumption that logical_id == physical_id (which in
8045 # turn is the unique_id on that node)
8047 # FIXME(iustin): use a better name for the replaced LVs
8048 temp_suffix = int(time.time())
8049 ren_fn = lambda d, suff: (d.physical_id[0],
8050 d.physical_id[1] + "_replaced-%s" % suff)
8052 # Build the rename list based on what LVs exist on the node
8053 rename_old_to_new = []
8054 for to_ren in old_lvs:
8055 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8056 if not result.fail_msg and result.payload:
8058 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8060 self.lu.LogInfo("Renaming the old LVs on the target node")
8061 result = self.rpc.call_blockdev_rename(self.target_node,
8063 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8065 # Now we rename the new LVs to the old LVs
8066 self.lu.LogInfo("Renaming the new LVs on the target node")
8067 rename_new_to_old = [(new, old.physical_id)
8068 for old, new in zip(old_lvs, new_lvs)]
8069 result = self.rpc.call_blockdev_rename(self.target_node,
8071 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8073 for old, new in zip(old_lvs, new_lvs):
8074 new.logical_id = old.logical_id
8075 self.cfg.SetDiskID(new, self.target_node)
8077 for disk in old_lvs:
8078 disk.logical_id = ren_fn(disk, temp_suffix)
8079 self.cfg.SetDiskID(disk, self.target_node)
8081 # Now that the new lvs have the old name, we can add them to the device
8082 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8083 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8085 msg = result.fail_msg
8087 for new_lv in new_lvs:
8088 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8091 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8092 hint=("cleanup manually the unused logical"
8094 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8096 dev.children = new_lvs
8098 self.cfg.Update(self.instance, feedback_fn)
8101 if self.early_release:
8102 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8104 self._RemoveOldStorage(self.target_node, iv_names)
8105 # WARNING: we release both node locks here, do not do other RPCs
8106 # than WaitForSync to the primary node
8107 self._ReleaseNodeLock([self.target_node, self.other_node])
8110 # This can fail as the old devices are degraded and _WaitForSync
8111 # does a combined result over all disks, so we don't check its return value
8112 self.lu.LogStep(cstep, steps_total, "Sync devices")
8114 _WaitForSync(self.lu, self.instance)
8116 # Check all devices manually
8117 self._CheckDevices(self.instance.primary_node, iv_names)
8119 # Step: remove old storage
8120 if not self.early_release:
8121 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8123 self._RemoveOldStorage(self.target_node, iv_names)
8125 def _ExecDrbd8Secondary(self, feedback_fn):
8126 """Replace the secondary node for DRBD 8.
8128 The algorithm for replace is quite complicated:
8129 - for all disks of the instance:
8130 - create new LVs on the new node with same names
8131 - shutdown the drbd device on the old secondary
8132 - disconnect the drbd network on the primary
8133 - create the drbd device on the new secondary
8134 - network attach the drbd on the primary, using an artifice:
8135 the drbd code for Attach() will connect to the network if it
8136 finds a device which is connected to the good local disks but
8138 - wait for sync across all devices
8139 - remove all disks from the old secondary
8141 Failures are not very well handled.
8146 # Step: check device activation
8147 self.lu.LogStep(1, steps_total, "Check device existence")
8148 self._CheckDisksExistence([self.instance.primary_node])
8149 self._CheckVolumeGroup([self.instance.primary_node])
8151 # Step: check other node consistency
8152 self.lu.LogStep(2, steps_total, "Check peer consistency")
8153 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8155 # Step: create new storage
8156 self.lu.LogStep(3, steps_total, "Allocate new storage")
8157 for idx, dev in enumerate(self.instance.disks):
8158 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8159 (self.new_node, idx))
8160 # we pass force_create=True to force LVM creation
8161 for new_lv in dev.children:
8162 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8163 _GetInstanceInfoText(self.instance), False)
8165 # Step 4: dbrd minors and drbd setups changes
8166 # after this, we must manually remove the drbd minors on both the
8167 # error and the success paths
8168 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8169 minors = self.cfg.AllocateDRBDMinor([self.new_node
8170 for dev in self.instance.disks],
8172 logging.debug("Allocated minors %r", minors)
8175 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8176 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8177 (self.new_node, idx))
8178 # create new devices on new_node; note that we create two IDs:
8179 # one without port, so the drbd will be activated without
8180 # networking information on the new node at this stage, and one
8181 # with network, for the latter activation in step 4
8182 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8183 if self.instance.primary_node == o_node1:
8186 assert self.instance.primary_node == o_node2, "Three-node instance?"
8189 new_alone_id = (self.instance.primary_node, self.new_node, None,
8190 p_minor, new_minor, o_secret)
8191 new_net_id = (self.instance.primary_node, self.new_node, o_port,
8192 p_minor, new_minor, o_secret)
8194 iv_names[idx] = (dev, dev.children, new_net_id)
8195 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8197 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8198 logical_id=new_alone_id,
8199 children=dev.children,
8202 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8203 _GetInstanceInfoText(self.instance), False)
8204 except errors.GenericError:
8205 self.cfg.ReleaseDRBDMinors(self.instance.name)
8208 # We have new devices, shutdown the drbd on the old secondary
8209 for idx, dev in enumerate(self.instance.disks):
8210 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8211 self.cfg.SetDiskID(dev, self.target_node)
8212 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8214 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8215 "node: %s" % (idx, msg),
8216 hint=("Please cleanup this device manually as"
8217 " soon as possible"))
8219 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8220 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8221 self.node_secondary_ip,
8222 self.instance.disks)\
8223 [self.instance.primary_node]
8225 msg = result.fail_msg
8227 # detaches didn't succeed (unlikely)
8228 self.cfg.ReleaseDRBDMinors(self.instance.name)
8229 raise errors.OpExecError("Can't detach the disks from the network on"
8230 " old node: %s" % (msg,))
8232 # if we managed to detach at least one, we update all the disks of
8233 # the instance to point to the new secondary
8234 self.lu.LogInfo("Updating instance configuration")
8235 for dev, _, new_logical_id in iv_names.itervalues():
8236 dev.logical_id = new_logical_id
8237 self.cfg.SetDiskID(dev, self.instance.primary_node)
8239 self.cfg.Update(self.instance, feedback_fn)
8241 # and now perform the drbd attach
8242 self.lu.LogInfo("Attaching primary drbds to new secondary"
8243 " (standalone => connected)")
8244 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8246 self.node_secondary_ip,
8247 self.instance.disks,
8250 for to_node, to_result in result.items():
8251 msg = to_result.fail_msg
8253 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8255 hint=("please do a gnt-instance info to see the"
8256 " status of disks"))
8258 if self.early_release:
8259 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8261 self._RemoveOldStorage(self.target_node, iv_names)
8262 # WARNING: we release all node locks here, do not do other RPCs
8263 # than WaitForSync to the primary node
8264 self._ReleaseNodeLock([self.instance.primary_node,
8269 # This can fail as the old devices are degraded and _WaitForSync
8270 # does a combined result over all disks, so we don't check its return value
8271 self.lu.LogStep(cstep, steps_total, "Sync devices")
8273 _WaitForSync(self.lu, self.instance)
8275 # Check all devices manually
8276 self._CheckDevices(self.instance.primary_node, iv_names)
8278 # Step: remove old storage
8279 if not self.early_release:
8280 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8281 self._RemoveOldStorage(self.target_node, iv_names)
8284 class LURepairNodeStorage(NoHooksLU):
8285 """Repairs the volume group on a node.
8290 ("storage_type", _NoDefault, _CheckStorageType),
8291 ("name", _NoDefault, _TNonEmptyString),
8292 ("ignore_consistency", False, _TBool),
8296 def CheckArguments(self):
8297 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8299 storage_type = self.op.storage_type
8301 if (constants.SO_FIX_CONSISTENCY not in
8302 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8303 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8304 " repaired" % storage_type,
8307 def ExpandNames(self):
8308 self.needed_locks = {
8309 locking.LEVEL_NODE: [self.op.node_name],
8312 def _CheckFaultyDisks(self, instance, node_name):
8313 """Ensure faulty disks abort the opcode or at least warn."""
8315 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8317 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8318 " node '%s'" % (instance.name, node_name),
8320 except errors.OpPrereqError, err:
8321 if self.op.ignore_consistency:
8322 self.proc.LogWarning(str(err.args[0]))
8326 def CheckPrereq(self):
8327 """Check prerequisites.
8330 # Check whether any instance on this node has faulty disks
8331 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8332 if not inst.admin_up:
8334 check_nodes = set(inst.all_nodes)
8335 check_nodes.discard(self.op.node_name)
8336 for inst_node_name in check_nodes:
8337 self._CheckFaultyDisks(inst, inst_node_name)
8339 def Exec(self, feedback_fn):
8340 feedback_fn("Repairing storage unit '%s' on %s ..." %
8341 (self.op.name, self.op.node_name))
8343 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8344 result = self.rpc.call_storage_execute(self.op.node_name,
8345 self.op.storage_type, st_args,
8347 constants.SO_FIX_CONSISTENCY)
8348 result.Raise("Failed to repair storage unit '%s' on %s" %
8349 (self.op.name, self.op.node_name))
8352 class LUNodeEvacuationStrategy(NoHooksLU):
8353 """Computes the node evacuation strategy.
8357 ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8358 ("remote_node", None, _TMaybeString),
8359 ("iallocator", None, _TMaybeString),
8363 def CheckArguments(self):
8364 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8366 def ExpandNames(self):
8367 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8368 self.needed_locks = locks = {}
8369 if self.op.remote_node is None:
8370 locks[locking.LEVEL_NODE] = locking.ALL_SET
8372 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8373 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8375 def Exec(self, feedback_fn):
8376 if self.op.remote_node is not None:
8378 for node in self.op.nodes:
8379 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8382 if i.primary_node == self.op.remote_node:
8383 raise errors.OpPrereqError("Node %s is the primary node of"
8384 " instance %s, cannot use it as"
8386 (self.op.remote_node, i.name),
8388 result.append([i.name, self.op.remote_node])
8390 ial = IAllocator(self.cfg, self.rpc,
8391 mode=constants.IALLOCATOR_MODE_MEVAC,
8392 evac_nodes=self.op.nodes)
8393 ial.Run(self.op.iallocator, validate=True)
8395 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8401 class LUGrowDisk(LogicalUnit):
8402 """Grow a disk of an instance.
8406 HTYPE = constants.HTYPE_INSTANCE
8409 ("disk", _NoDefault, _TInt),
8410 ("amount", _NoDefault, _TInt),
8411 ("wait_for_sync", True, _TBool),
8415 def ExpandNames(self):
8416 self._ExpandAndLockInstance()
8417 self.needed_locks[locking.LEVEL_NODE] = []
8418 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8420 def DeclareLocks(self, level):
8421 if level == locking.LEVEL_NODE:
8422 self._LockInstancesNodes()
8424 def BuildHooksEnv(self):
8427 This runs on the master, the primary and all the secondaries.
8431 "DISK": self.op.disk,
8432 "AMOUNT": self.op.amount,
8434 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8435 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8438 def CheckPrereq(self):
8439 """Check prerequisites.
8441 This checks that the instance is in the cluster.
8444 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8445 assert instance is not None, \
8446 "Cannot retrieve locked instance %s" % self.op.instance_name
8447 nodenames = list(instance.all_nodes)
8448 for node in nodenames:
8449 _CheckNodeOnline(self, node)
8451 self.instance = instance
8453 if instance.disk_template not in constants.DTS_GROWABLE:
8454 raise errors.OpPrereqError("Instance's disk layout does not support"
8455 " growing.", errors.ECODE_INVAL)
8457 self.disk = instance.FindDisk(self.op.disk)
8459 if instance.disk_template != constants.DT_FILE:
8460 # TODO: check the free disk space for file, when that feature will be
8462 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8464 def Exec(self, feedback_fn):
8465 """Execute disk grow.
8468 instance = self.instance
8471 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8473 raise errors.OpExecError("Cannot activate block device to grow")
8475 for node in instance.all_nodes:
8476 self.cfg.SetDiskID(disk, node)
8477 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8478 result.Raise("Grow request failed to node %s" % node)
8480 # TODO: Rewrite code to work properly
8481 # DRBD goes into sync mode for a short amount of time after executing the
8482 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8483 # calling "resize" in sync mode fails. Sleeping for a short amount of
8484 # time is a work-around.
8487 disk.RecordGrow(self.op.amount)
8488 self.cfg.Update(instance, feedback_fn)
8489 if self.op.wait_for_sync:
8490 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8492 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8493 " status.\nPlease check the instance.")
8494 if not instance.admin_up:
8495 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8496 elif not instance.admin_up:
8497 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8498 " not supposed to be running because no wait for"
8499 " sync mode was requested.")
8502 class LUQueryInstanceData(NoHooksLU):
8503 """Query runtime instance data.
8507 ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8508 ("static", False, _TBool),
8512 def ExpandNames(self):
8513 self.needed_locks = {}
8514 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8516 if self.op.instances:
8517 self.wanted_names = []
8518 for name in self.op.instances:
8519 full_name = _ExpandInstanceName(self.cfg, name)
8520 self.wanted_names.append(full_name)
8521 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8523 self.wanted_names = None
8524 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8526 self.needed_locks[locking.LEVEL_NODE] = []
8527 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8529 def DeclareLocks(self, level):
8530 if level == locking.LEVEL_NODE:
8531 self._LockInstancesNodes()
8533 def CheckPrereq(self):
8534 """Check prerequisites.
8536 This only checks the optional instance list against the existing names.
8539 if self.wanted_names is None:
8540 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8542 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8543 in self.wanted_names]
8545 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8546 """Returns the status of a block device
8549 if self.op.static or not node:
8552 self.cfg.SetDiskID(dev, node)
8554 result = self.rpc.call_blockdev_find(node, dev)
8558 result.Raise("Can't compute disk status for %s" % instance_name)
8560 status = result.payload
8564 return (status.dev_path, status.major, status.minor,
8565 status.sync_percent, status.estimated_time,
8566 status.is_degraded, status.ldisk_status)
8568 def _ComputeDiskStatus(self, instance, snode, dev):
8569 """Compute block device status.
8572 if dev.dev_type in constants.LDS_DRBD:
8573 # we change the snode then (otherwise we use the one passed in)
8574 if dev.logical_id[0] == instance.primary_node:
8575 snode = dev.logical_id[1]
8577 snode = dev.logical_id[0]
8579 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8581 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8584 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8585 for child in dev.children]
8590 "iv_name": dev.iv_name,
8591 "dev_type": dev.dev_type,
8592 "logical_id": dev.logical_id,
8593 "physical_id": dev.physical_id,
8594 "pstatus": dev_pstatus,
8595 "sstatus": dev_sstatus,
8596 "children": dev_children,
8603 def Exec(self, feedback_fn):
8604 """Gather and return data"""
8607 cluster = self.cfg.GetClusterInfo()
8609 for instance in self.wanted_instances:
8610 if not self.op.static:
8611 remote_info = self.rpc.call_instance_info(instance.primary_node,
8613 instance.hypervisor)
8614 remote_info.Raise("Error checking node %s" % instance.primary_node)
8615 remote_info = remote_info.payload
8616 if remote_info and "state" in remote_info:
8619 remote_state = "down"
8622 if instance.admin_up:
8625 config_state = "down"
8627 disks = [self._ComputeDiskStatus(instance, None, device)
8628 for device in instance.disks]
8631 "name": instance.name,
8632 "config_state": config_state,
8633 "run_state": remote_state,
8634 "pnode": instance.primary_node,
8635 "snodes": instance.secondary_nodes,
8637 # this happens to be the same format used for hooks
8638 "nics": _NICListToTuple(self, instance.nics),
8639 "disk_template": instance.disk_template,
8641 "hypervisor": instance.hypervisor,
8642 "network_port": instance.network_port,
8643 "hv_instance": instance.hvparams,
8644 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8645 "be_instance": instance.beparams,
8646 "be_actual": cluster.FillBE(instance),
8647 "os_instance": instance.osparams,
8648 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8649 "serial_no": instance.serial_no,
8650 "mtime": instance.mtime,
8651 "ctime": instance.ctime,
8652 "uuid": instance.uuid,
8655 result[instance.name] = idict
8660 class LUSetInstanceParams(LogicalUnit):
8661 """Modifies an instances's parameters.
8664 HPATH = "instance-modify"
8665 HTYPE = constants.HTYPE_INSTANCE
8668 ("nics", _EmptyList, _TList),
8669 ("disks", _EmptyList, _TList),
8670 ("beparams", _EmptyDict, _TDict),
8671 ("hvparams", _EmptyDict, _TDict),
8672 ("disk_template", None, _TMaybeString),
8673 ("remote_node", None, _TMaybeString),
8674 ("os_name", None, _TMaybeString),
8675 ("force_variant", False, _TBool),
8676 ("osparams", None, _TOr(_TDict, _TNone)),
8681 def CheckArguments(self):
8682 if not (self.op.nics or self.op.disks or self.op.disk_template or
8683 self.op.hvparams or self.op.beparams or self.op.os_name):
8684 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8686 if self.op.hvparams:
8687 _CheckGlobalHvParams(self.op.hvparams)
8691 for disk_op, disk_dict in self.op.disks:
8692 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8693 if disk_op == constants.DDM_REMOVE:
8696 elif disk_op == constants.DDM_ADD:
8699 if not isinstance(disk_op, int):
8700 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8701 if not isinstance(disk_dict, dict):
8702 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8703 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8705 if disk_op == constants.DDM_ADD:
8706 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8707 if mode not in constants.DISK_ACCESS_SET:
8708 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8710 size = disk_dict.get('size', None)
8712 raise errors.OpPrereqError("Required disk parameter size missing",
8716 except (TypeError, ValueError), err:
8717 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8718 str(err), errors.ECODE_INVAL)
8719 disk_dict['size'] = size
8721 # modification of disk
8722 if 'size' in disk_dict:
8723 raise errors.OpPrereqError("Disk size change not possible, use"
8724 " grow-disk", errors.ECODE_INVAL)
8726 if disk_addremove > 1:
8727 raise errors.OpPrereqError("Only one disk add or remove operation"
8728 " supported at a time", errors.ECODE_INVAL)
8730 if self.op.disks and self.op.disk_template is not None:
8731 raise errors.OpPrereqError("Disk template conversion and other disk"
8732 " changes not supported at the same time",
8735 if self.op.disk_template:
8736 _CheckDiskTemplate(self.op.disk_template)
8737 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8738 self.op.remote_node is None):
8739 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8740 " one requires specifying a secondary node",
8745 for nic_op, nic_dict in self.op.nics:
8746 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8747 if nic_op == constants.DDM_REMOVE:
8750 elif nic_op == constants.DDM_ADD:
8753 if not isinstance(nic_op, int):
8754 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8755 if not isinstance(nic_dict, dict):
8756 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8757 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8759 # nic_dict should be a dict
8760 nic_ip = nic_dict.get('ip', None)
8761 if nic_ip is not None:
8762 if nic_ip.lower() == constants.VALUE_NONE:
8763 nic_dict['ip'] = None
8765 if not netutils.IsValidIP4(nic_ip):
8766 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8769 nic_bridge = nic_dict.get('bridge', None)
8770 nic_link = nic_dict.get('link', None)
8771 if nic_bridge and nic_link:
8772 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8773 " at the same time", errors.ECODE_INVAL)
8774 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8775 nic_dict['bridge'] = None
8776 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8777 nic_dict['link'] = None
8779 if nic_op == constants.DDM_ADD:
8780 nic_mac = nic_dict.get('mac', None)
8782 nic_dict['mac'] = constants.VALUE_AUTO
8784 if 'mac' in nic_dict:
8785 nic_mac = nic_dict['mac']
8786 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8787 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8789 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8790 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8791 " modifying an existing nic",
8794 if nic_addremove > 1:
8795 raise errors.OpPrereqError("Only one NIC add or remove operation"
8796 " supported at a time", errors.ECODE_INVAL)
8798 def ExpandNames(self):
8799 self._ExpandAndLockInstance()
8800 self.needed_locks[locking.LEVEL_NODE] = []
8801 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8803 def DeclareLocks(self, level):
8804 if level == locking.LEVEL_NODE:
8805 self._LockInstancesNodes()
8806 if self.op.disk_template and self.op.remote_node:
8807 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8808 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8810 def BuildHooksEnv(self):
8813 This runs on the master, primary and secondaries.
8817 if constants.BE_MEMORY in self.be_new:
8818 args['memory'] = self.be_new[constants.BE_MEMORY]
8819 if constants.BE_VCPUS in self.be_new:
8820 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8821 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8822 # information at all.
8825 nic_override = dict(self.op.nics)
8826 for idx, nic in enumerate(self.instance.nics):
8827 if idx in nic_override:
8828 this_nic_override = nic_override[idx]
8830 this_nic_override = {}
8831 if 'ip' in this_nic_override:
8832 ip = this_nic_override['ip']
8835 if 'mac' in this_nic_override:
8836 mac = this_nic_override['mac']
8839 if idx in self.nic_pnew:
8840 nicparams = self.nic_pnew[idx]
8842 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8843 mode = nicparams[constants.NIC_MODE]
8844 link = nicparams[constants.NIC_LINK]
8845 args['nics'].append((ip, mac, mode, link))
8846 if constants.DDM_ADD in nic_override:
8847 ip = nic_override[constants.DDM_ADD].get('ip', None)
8848 mac = nic_override[constants.DDM_ADD]['mac']
8849 nicparams = self.nic_pnew[constants.DDM_ADD]
8850 mode = nicparams[constants.NIC_MODE]
8851 link = nicparams[constants.NIC_LINK]
8852 args['nics'].append((ip, mac, mode, link))
8853 elif constants.DDM_REMOVE in nic_override:
8854 del args['nics'][-1]
8856 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8857 if self.op.disk_template:
8858 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8859 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8862 def CheckPrereq(self):
8863 """Check prerequisites.
8865 This only checks the instance list against the existing names.
8868 # checking the new params on the primary/secondary nodes
8870 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8871 cluster = self.cluster = self.cfg.GetClusterInfo()
8872 assert self.instance is not None, \
8873 "Cannot retrieve locked instance %s" % self.op.instance_name
8874 pnode = instance.primary_node
8875 nodelist = list(instance.all_nodes)
8878 if self.op.os_name and not self.op.force:
8879 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8880 self.op.force_variant)
8881 instance_os = self.op.os_name
8883 instance_os = instance.os
8885 if self.op.disk_template:
8886 if instance.disk_template == self.op.disk_template:
8887 raise errors.OpPrereqError("Instance already has disk template %s" %
8888 instance.disk_template, errors.ECODE_INVAL)
8890 if (instance.disk_template,
8891 self.op.disk_template) not in self._DISK_CONVERSIONS:
8892 raise errors.OpPrereqError("Unsupported disk template conversion from"
8893 " %s to %s" % (instance.disk_template,
8894 self.op.disk_template),
8896 _CheckInstanceDown(self, instance, "cannot change disk template")
8897 if self.op.disk_template in constants.DTS_NET_MIRROR:
8898 if self.op.remote_node == pnode:
8899 raise errors.OpPrereqError("Given new secondary node %s is the same"
8900 " as the primary node of the instance" %
8901 self.op.remote_node, errors.ECODE_STATE)
8902 _CheckNodeOnline(self, self.op.remote_node)
8903 _CheckNodeNotDrained(self, self.op.remote_node)
8904 disks = [{"size": d.size} for d in instance.disks]
8905 required = _ComputeDiskSize(self.op.disk_template, disks)
8906 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8908 # hvparams processing
8909 if self.op.hvparams:
8910 hv_type = instance.hypervisor
8911 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8912 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8913 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8916 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8917 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8918 self.hv_new = hv_new # the new actual values
8919 self.hv_inst = i_hvdict # the new dict (without defaults)
8921 self.hv_new = self.hv_inst = {}
8923 # beparams processing
8924 if self.op.beparams:
8925 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8927 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8928 be_new = cluster.SimpleFillBE(i_bedict)
8929 self.be_new = be_new # the new actual values
8930 self.be_inst = i_bedict # the new dict (without defaults)
8932 self.be_new = self.be_inst = {}
8934 # osparams processing
8935 if self.op.osparams:
8936 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8937 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8938 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8939 self.os_inst = i_osdict # the new dict (without defaults)
8941 self.os_new = self.os_inst = {}
8945 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8946 mem_check_list = [pnode]
8947 if be_new[constants.BE_AUTO_BALANCE]:
8948 # either we changed auto_balance to yes or it was from before
8949 mem_check_list.extend(instance.secondary_nodes)
8950 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8951 instance.hypervisor)
8952 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8953 instance.hypervisor)
8954 pninfo = nodeinfo[pnode]
8955 msg = pninfo.fail_msg
8957 # Assume the primary node is unreachable and go ahead
8958 self.warn.append("Can't get info from primary node %s: %s" %
8960 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8961 self.warn.append("Node data from primary node %s doesn't contain"
8962 " free memory information" % pnode)
8963 elif instance_info.fail_msg:
8964 self.warn.append("Can't get instance runtime information: %s" %
8965 instance_info.fail_msg)
8967 if instance_info.payload:
8968 current_mem = int(instance_info.payload['memory'])
8970 # Assume instance not running
8971 # (there is a slight race condition here, but it's not very probable,
8972 # and we have no other way to check)
8974 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8975 pninfo.payload['memory_free'])
8977 raise errors.OpPrereqError("This change will prevent the instance"
8978 " from starting, due to %d MB of memory"
8979 " missing on its primary node" % miss_mem,
8982 if be_new[constants.BE_AUTO_BALANCE]:
8983 for node, nres in nodeinfo.items():
8984 if node not in instance.secondary_nodes:
8988 self.warn.append("Can't get info from secondary node %s: %s" %
8990 elif not isinstance(nres.payload.get('memory_free', None), int):
8991 self.warn.append("Secondary node %s didn't return free"
8992 " memory information" % node)
8993 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8994 self.warn.append("Not enough memory to failover instance to"
8995 " secondary node %s" % node)
9000 for nic_op, nic_dict in self.op.nics:
9001 if nic_op == constants.DDM_REMOVE:
9002 if not instance.nics:
9003 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9006 if nic_op != constants.DDM_ADD:
9008 if not instance.nics:
9009 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9010 " no NICs" % nic_op,
9012 if nic_op < 0 or nic_op >= len(instance.nics):
9013 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9015 (nic_op, len(instance.nics) - 1),
9017 old_nic_params = instance.nics[nic_op].nicparams
9018 old_nic_ip = instance.nics[nic_op].ip
9023 update_params_dict = dict([(key, nic_dict[key])
9024 for key in constants.NICS_PARAMETERS
9025 if key in nic_dict])
9027 if 'bridge' in nic_dict:
9028 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9030 new_nic_params = _GetUpdatedParams(old_nic_params,
9032 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9033 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9034 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9035 self.nic_pinst[nic_op] = new_nic_params
9036 self.nic_pnew[nic_op] = new_filled_nic_params
9037 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9039 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9040 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9041 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9043 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9045 self.warn.append(msg)
9047 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9048 if new_nic_mode == constants.NIC_MODE_ROUTED:
9049 if 'ip' in nic_dict:
9050 nic_ip = nic_dict['ip']
9054 raise errors.OpPrereqError('Cannot set the nic ip to None'
9055 ' on a routed nic', errors.ECODE_INVAL)
9056 if 'mac' in nic_dict:
9057 nic_mac = nic_dict['mac']
9059 raise errors.OpPrereqError('Cannot set the nic mac to None',
9061 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9062 # otherwise generate the mac
9063 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9065 # or validate/reserve the current one
9067 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9068 except errors.ReservationError:
9069 raise errors.OpPrereqError("MAC address %s already in use"
9070 " in cluster" % nic_mac,
9071 errors.ECODE_NOTUNIQUE)
9074 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9075 raise errors.OpPrereqError("Disk operations not supported for"
9076 " diskless instances",
9078 for disk_op, _ in self.op.disks:
9079 if disk_op == constants.DDM_REMOVE:
9080 if len(instance.disks) == 1:
9081 raise errors.OpPrereqError("Cannot remove the last disk of"
9082 " an instance", errors.ECODE_INVAL)
9083 _CheckInstanceDown(self, instance, "cannot remove disks")
9085 if (disk_op == constants.DDM_ADD and
9086 len(instance.nics) >= constants.MAX_DISKS):
9087 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9088 " add more" % constants.MAX_DISKS,
9090 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9092 if disk_op < 0 or disk_op >= len(instance.disks):
9093 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9095 (disk_op, len(instance.disks)),
9100 def _ConvertPlainToDrbd(self, feedback_fn):
9101 """Converts an instance from plain to drbd.
9104 feedback_fn("Converting template to drbd")
9105 instance = self.instance
9106 pnode = instance.primary_node
9107 snode = self.op.remote_node
9109 # create a fake disk info for _GenerateDiskTemplate
9110 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9111 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9112 instance.name, pnode, [snode],
9113 disk_info, None, None, 0)
9114 info = _GetInstanceInfoText(instance)
9115 feedback_fn("Creating aditional volumes...")
9116 # first, create the missing data and meta devices
9117 for disk in new_disks:
9118 # unfortunately this is... not too nice
9119 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9121 for child in disk.children:
9122 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9123 # at this stage, all new LVs have been created, we can rename the
9125 feedback_fn("Renaming original volumes...")
9126 rename_list = [(o, n.children[0].logical_id)
9127 for (o, n) in zip(instance.disks, new_disks)]
9128 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9129 result.Raise("Failed to rename original LVs")
9131 feedback_fn("Initializing DRBD devices...")
9132 # all child devices are in place, we can now create the DRBD devices
9133 for disk in new_disks:
9134 for node in [pnode, snode]:
9135 f_create = node == pnode
9136 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9138 # at this point, the instance has been modified
9139 instance.disk_template = constants.DT_DRBD8
9140 instance.disks = new_disks
9141 self.cfg.Update(instance, feedback_fn)
9143 # disks are created, waiting for sync
9144 disk_abort = not _WaitForSync(self, instance)
9146 raise errors.OpExecError("There are some degraded disks for"
9147 " this instance, please cleanup manually")
9149 def _ConvertDrbdToPlain(self, feedback_fn):
9150 """Converts an instance from drbd to plain.
9153 instance = self.instance
9154 assert len(instance.secondary_nodes) == 1
9155 pnode = instance.primary_node
9156 snode = instance.secondary_nodes[0]
9157 feedback_fn("Converting template to plain")
9159 old_disks = instance.disks
9160 new_disks = [d.children[0] for d in old_disks]
9162 # copy over size and mode
9163 for parent, child in zip(old_disks, new_disks):
9164 child.size = parent.size
9165 child.mode = parent.mode
9167 # update instance structure
9168 instance.disks = new_disks
9169 instance.disk_template = constants.DT_PLAIN
9170 self.cfg.Update(instance, feedback_fn)
9172 feedback_fn("Removing volumes on the secondary node...")
9173 for disk in old_disks:
9174 self.cfg.SetDiskID(disk, snode)
9175 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9177 self.LogWarning("Could not remove block device %s on node %s,"
9178 " continuing anyway: %s", disk.iv_name, snode, msg)
9180 feedback_fn("Removing unneeded volumes on the primary node...")
9181 for idx, disk in enumerate(old_disks):
9182 meta = disk.children[1]
9183 self.cfg.SetDiskID(meta, pnode)
9184 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9186 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9187 " continuing anyway: %s", idx, pnode, msg)
9190 def Exec(self, feedback_fn):
9191 """Modifies an instance.
9193 All parameters take effect only at the next restart of the instance.
9196 # Process here the warnings from CheckPrereq, as we don't have a
9197 # feedback_fn there.
9198 for warn in self.warn:
9199 feedback_fn("WARNING: %s" % warn)
9202 instance = self.instance
9204 for disk_op, disk_dict in self.op.disks:
9205 if disk_op == constants.DDM_REMOVE:
9206 # remove the last disk
9207 device = instance.disks.pop()
9208 device_idx = len(instance.disks)
9209 for node, disk in device.ComputeNodeTree(instance.primary_node):
9210 self.cfg.SetDiskID(disk, node)
9211 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9213 self.LogWarning("Could not remove disk/%d on node %s: %s,"
9214 " continuing anyway", device_idx, node, msg)
9215 result.append(("disk/%d" % device_idx, "remove"))
9216 elif disk_op == constants.DDM_ADD:
9218 if instance.disk_template == constants.DT_FILE:
9219 file_driver, file_path = instance.disks[0].logical_id
9220 file_path = os.path.dirname(file_path)
9222 file_driver = file_path = None
9223 disk_idx_base = len(instance.disks)
9224 new_disk = _GenerateDiskTemplate(self,
9225 instance.disk_template,
9226 instance.name, instance.primary_node,
9227 instance.secondary_nodes,
9232 instance.disks.append(new_disk)
9233 info = _GetInstanceInfoText(instance)
9235 logging.info("Creating volume %s for instance %s",
9236 new_disk.iv_name, instance.name)
9237 # Note: this needs to be kept in sync with _CreateDisks
9239 for node in instance.all_nodes:
9240 f_create = node == instance.primary_node
9242 _CreateBlockDev(self, node, instance, new_disk,
9243 f_create, info, f_create)
9244 except errors.OpExecError, err:
9245 self.LogWarning("Failed to create volume %s (%s) on"
9247 new_disk.iv_name, new_disk, node, err)
9248 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9249 (new_disk.size, new_disk.mode)))
9251 # change a given disk
9252 instance.disks[disk_op].mode = disk_dict['mode']
9253 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9255 if self.op.disk_template:
9256 r_shut = _ShutdownInstanceDisks(self, instance)
9258 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9259 " proceed with disk template conversion")
9260 mode = (instance.disk_template, self.op.disk_template)
9262 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9264 self.cfg.ReleaseDRBDMinors(instance.name)
9266 result.append(("disk_template", self.op.disk_template))
9269 for nic_op, nic_dict in self.op.nics:
9270 if nic_op == constants.DDM_REMOVE:
9271 # remove the last nic
9272 del instance.nics[-1]
9273 result.append(("nic.%d" % len(instance.nics), "remove"))
9274 elif nic_op == constants.DDM_ADD:
9275 # mac and bridge should be set, by now
9276 mac = nic_dict['mac']
9277 ip = nic_dict.get('ip', None)
9278 nicparams = self.nic_pinst[constants.DDM_ADD]
9279 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9280 instance.nics.append(new_nic)
9281 result.append(("nic.%d" % (len(instance.nics) - 1),
9282 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9283 (new_nic.mac, new_nic.ip,
9284 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9285 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9288 for key in 'mac', 'ip':
9290 setattr(instance.nics[nic_op], key, nic_dict[key])
9291 if nic_op in self.nic_pinst:
9292 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9293 for key, val in nic_dict.iteritems():
9294 result.append(("nic.%s/%d" % (key, nic_op), val))
9297 if self.op.hvparams:
9298 instance.hvparams = self.hv_inst
9299 for key, val in self.op.hvparams.iteritems():
9300 result.append(("hv/%s" % key, val))
9303 if self.op.beparams:
9304 instance.beparams = self.be_inst
9305 for key, val in self.op.beparams.iteritems():
9306 result.append(("be/%s" % key, val))
9310 instance.os = self.op.os_name
9313 if self.op.osparams:
9314 instance.osparams = self.os_inst
9315 for key, val in self.op.osparams.iteritems():
9316 result.append(("os/%s" % key, val))
9318 self.cfg.Update(instance, feedback_fn)
9322 _DISK_CONVERSIONS = {
9323 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9324 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9328 class LUQueryExports(NoHooksLU):
9329 """Query the exports list
9333 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9334 ("use_locking", False, _TBool),
9338 def ExpandNames(self):
9339 self.needed_locks = {}
9340 self.share_locks[locking.LEVEL_NODE] = 1
9341 if not self.op.nodes:
9342 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9344 self.needed_locks[locking.LEVEL_NODE] = \
9345 _GetWantedNodes(self, self.op.nodes)
9347 def Exec(self, feedback_fn):
9348 """Compute the list of all the exported system images.
9351 @return: a dictionary with the structure node->(export-list)
9352 where export-list is a list of the instances exported on
9356 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9357 rpcresult = self.rpc.call_export_list(self.nodes)
9359 for node in rpcresult:
9360 if rpcresult[node].fail_msg:
9361 result[node] = False
9363 result[node] = rpcresult[node].payload
9368 class LUPrepareExport(NoHooksLU):
9369 """Prepares an instance for an export and returns useful information.
9374 ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9378 def ExpandNames(self):
9379 self._ExpandAndLockInstance()
9381 def CheckPrereq(self):
9382 """Check prerequisites.
9385 instance_name = self.op.instance_name
9387 self.instance = self.cfg.GetInstanceInfo(instance_name)
9388 assert self.instance is not None, \
9389 "Cannot retrieve locked instance %s" % self.op.instance_name
9390 _CheckNodeOnline(self, self.instance.primary_node)
9392 self._cds = _GetClusterDomainSecret()
9394 def Exec(self, feedback_fn):
9395 """Prepares an instance for an export.
9398 instance = self.instance
9400 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9401 salt = utils.GenerateSecret(8)
9403 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9404 result = self.rpc.call_x509_cert_create(instance.primary_node,
9405 constants.RIE_CERT_VALIDITY)
9406 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9408 (name, cert_pem) = result.payload
9410 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9414 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9415 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9417 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9423 class LUExportInstance(LogicalUnit):
9424 """Export an instance to an image in the cluster.
9427 HPATH = "instance-export"
9428 HTYPE = constants.HTYPE_INSTANCE
9431 ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9432 ("shutdown", True, _TBool),
9434 ("remove_instance", False, _TBool),
9435 ("ignore_remove_failures", False, _TBool),
9436 ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9437 ("x509_key_name", None, _TOr(_TList, _TNone)),
9438 ("destination_x509_ca", None, _TMaybeString),
9442 def CheckArguments(self):
9443 """Check the arguments.
9446 self.x509_key_name = self.op.x509_key_name
9447 self.dest_x509_ca_pem = self.op.destination_x509_ca
9449 if self.op.remove_instance and not self.op.shutdown:
9450 raise errors.OpPrereqError("Can not remove instance without shutting it"
9453 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9454 if not self.x509_key_name:
9455 raise errors.OpPrereqError("Missing X509 key name for encryption",
9458 if not self.dest_x509_ca_pem:
9459 raise errors.OpPrereqError("Missing destination X509 CA",
9462 def ExpandNames(self):
9463 self._ExpandAndLockInstance()
9465 # Lock all nodes for local exports
9466 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9467 # FIXME: lock only instance primary and destination node
9469 # Sad but true, for now we have do lock all nodes, as we don't know where
9470 # the previous export might be, and in this LU we search for it and
9471 # remove it from its current node. In the future we could fix this by:
9472 # - making a tasklet to search (share-lock all), then create the
9473 # new one, then one to remove, after
9474 # - removing the removal operation altogether
9475 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9477 def DeclareLocks(self, level):
9478 """Last minute lock declaration."""
9479 # All nodes are locked anyway, so nothing to do here.
9481 def BuildHooksEnv(self):
9484 This will run on the master, primary node and target node.
9488 "EXPORT_MODE": self.op.mode,
9489 "EXPORT_NODE": self.op.target_node,
9490 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9491 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9492 # TODO: Generic function for boolean env variables
9493 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9496 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9498 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9500 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9501 nl.append(self.op.target_node)
9505 def CheckPrereq(self):
9506 """Check prerequisites.
9508 This checks that the instance and node names are valid.
9511 instance_name = self.op.instance_name
9513 self.instance = self.cfg.GetInstanceInfo(instance_name)
9514 assert self.instance is not None, \
9515 "Cannot retrieve locked instance %s" % self.op.instance_name
9516 _CheckNodeOnline(self, self.instance.primary_node)
9518 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9519 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9520 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9521 assert self.dst_node is not None
9523 _CheckNodeOnline(self, self.dst_node.name)
9524 _CheckNodeNotDrained(self, self.dst_node.name)
9527 self.dest_disk_info = None
9528 self.dest_x509_ca = None
9530 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9531 self.dst_node = None
9533 if len(self.op.target_node) != len(self.instance.disks):
9534 raise errors.OpPrereqError(("Received destination information for %s"
9535 " disks, but instance %s has %s disks") %
9536 (len(self.op.target_node), instance_name,
9537 len(self.instance.disks)),
9540 cds = _GetClusterDomainSecret()
9542 # Check X509 key name
9544 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9545 except (TypeError, ValueError), err:
9546 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9548 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9549 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9552 # Load and verify CA
9554 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9555 except OpenSSL.crypto.Error, err:
9556 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9557 (err, ), errors.ECODE_INVAL)
9559 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9560 if errcode is not None:
9561 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9562 (msg, ), errors.ECODE_INVAL)
9564 self.dest_x509_ca = cert
9566 # Verify target information
9568 for idx, disk_data in enumerate(self.op.target_node):
9570 (host, port, magic) = \
9571 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9572 except errors.GenericError, err:
9573 raise errors.OpPrereqError("Target info for disk %s: %s" %
9574 (idx, err), errors.ECODE_INVAL)
9576 disk_info.append((host, port, magic))
9578 assert len(disk_info) == len(self.op.target_node)
9579 self.dest_disk_info = disk_info
9582 raise errors.ProgrammerError("Unhandled export mode %r" %
9585 # instance disk type verification
9586 # TODO: Implement export support for file-based disks
9587 for disk in self.instance.disks:
9588 if disk.dev_type == constants.LD_FILE:
9589 raise errors.OpPrereqError("Export not supported for instances with"
9590 " file-based disks", errors.ECODE_INVAL)
9592 def _CleanupExports(self, feedback_fn):
9593 """Removes exports of current instance from all other nodes.
9595 If an instance in a cluster with nodes A..D was exported to node C, its
9596 exports will be removed from the nodes A, B and D.
9599 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9601 nodelist = self.cfg.GetNodeList()
9602 nodelist.remove(self.dst_node.name)
9604 # on one-node clusters nodelist will be empty after the removal
9605 # if we proceed the backup would be removed because OpQueryExports
9606 # substitutes an empty list with the full cluster node list.
9607 iname = self.instance.name
9609 feedback_fn("Removing old exports for instance %s" % iname)
9610 exportlist = self.rpc.call_export_list(nodelist)
9611 for node in exportlist:
9612 if exportlist[node].fail_msg:
9614 if iname in exportlist[node].payload:
9615 msg = self.rpc.call_export_remove(node, iname).fail_msg
9617 self.LogWarning("Could not remove older export for instance %s"
9618 " on node %s: %s", iname, node, msg)
9620 def Exec(self, feedback_fn):
9621 """Export an instance to an image in the cluster.
9624 assert self.op.mode in constants.EXPORT_MODES
9626 instance = self.instance
9627 src_node = instance.primary_node
9629 if self.op.shutdown:
9630 # shutdown the instance, but not the disks
9631 feedback_fn("Shutting down instance %s" % instance.name)
9632 result = self.rpc.call_instance_shutdown(src_node, instance,
9633 self.op.shutdown_timeout)
9634 # TODO: Maybe ignore failures if ignore_remove_failures is set
9635 result.Raise("Could not shutdown instance %s on"
9636 " node %s" % (instance.name, src_node))
9638 # set the disks ID correctly since call_instance_start needs the
9639 # correct drbd minor to create the symlinks
9640 for disk in instance.disks:
9641 self.cfg.SetDiskID(disk, src_node)
9643 activate_disks = (not instance.admin_up)
9646 # Activate the instance disks if we'exporting a stopped instance
9647 feedback_fn("Activating disks for %s" % instance.name)
9648 _StartInstanceDisks(self, instance, None)
9651 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9654 helper.CreateSnapshots()
9656 if (self.op.shutdown and instance.admin_up and
9657 not self.op.remove_instance):
9658 assert not activate_disks
9659 feedback_fn("Starting instance %s" % instance.name)
9660 result = self.rpc.call_instance_start(src_node, instance, None, None)
9661 msg = result.fail_msg
9663 feedback_fn("Failed to start instance: %s" % msg)
9664 _ShutdownInstanceDisks(self, instance)
9665 raise errors.OpExecError("Could not start instance: %s" % msg)
9667 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9668 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9669 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9670 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9671 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9673 (key_name, _, _) = self.x509_key_name
9676 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9679 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9680 key_name, dest_ca_pem,
9685 # Check for backwards compatibility
9686 assert len(dresults) == len(instance.disks)
9687 assert compat.all(isinstance(i, bool) for i in dresults), \
9688 "Not all results are boolean: %r" % dresults
9692 feedback_fn("Deactivating disks for %s" % instance.name)
9693 _ShutdownInstanceDisks(self, instance)
9695 if not (compat.all(dresults) and fin_resu):
9698 failures.append("export finalization")
9699 if not compat.all(dresults):
9700 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9702 failures.append("disk export: disk(s) %s" % fdsk)
9704 raise errors.OpExecError("Export failed, errors in %s" %
9705 utils.CommaJoin(failures))
9707 # At this point, the export was successful, we can cleanup/finish
9709 # Remove instance if requested
9710 if self.op.remove_instance:
9711 feedback_fn("Removing instance %s" % instance.name)
9712 _RemoveInstance(self, feedback_fn, instance,
9713 self.op.ignore_remove_failures)
9715 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9716 self._CleanupExports(feedback_fn)
9718 return fin_resu, dresults
9721 class LURemoveExport(NoHooksLU):
9722 """Remove exports related to the named instance.
9730 def ExpandNames(self):
9731 self.needed_locks = {}
9732 # We need all nodes to be locked in order for RemoveExport to work, but we
9733 # don't need to lock the instance itself, as nothing will happen to it (and
9734 # we can remove exports also for a removed instance)
9735 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9737 def Exec(self, feedback_fn):
9738 """Remove any export.
9741 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9742 # If the instance was not found we'll try with the name that was passed in.
9743 # This will only work if it was an FQDN, though.
9745 if not instance_name:
9747 instance_name = self.op.instance_name
9749 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9750 exportlist = self.rpc.call_export_list(locked_nodes)
9752 for node in exportlist:
9753 msg = exportlist[node].fail_msg
9755 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9757 if instance_name in exportlist[node].payload:
9759 result = self.rpc.call_export_remove(node, instance_name)
9760 msg = result.fail_msg
9762 logging.error("Could not remove export for instance %s"
9763 " on node %s: %s", instance_name, node, msg)
9765 if fqdn_warn and not found:
9766 feedback_fn("Export not found. If trying to remove an export belonging"
9767 " to a deleted instance please use its Fully Qualified"
9771 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9774 This is an abstract class which is the parent of all the other tags LUs.
9778 def ExpandNames(self):
9779 self.needed_locks = {}
9780 if self.op.kind == constants.TAG_NODE:
9781 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9782 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9783 elif self.op.kind == constants.TAG_INSTANCE:
9784 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9785 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9787 def CheckPrereq(self):
9788 """Check prerequisites.
9791 if self.op.kind == constants.TAG_CLUSTER:
9792 self.target = self.cfg.GetClusterInfo()
9793 elif self.op.kind == constants.TAG_NODE:
9794 self.target = self.cfg.GetNodeInfo(self.op.name)
9795 elif self.op.kind == constants.TAG_INSTANCE:
9796 self.target = self.cfg.GetInstanceInfo(self.op.name)
9798 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9799 str(self.op.kind), errors.ECODE_INVAL)
9802 class LUGetTags(TagsLU):
9803 """Returns the tags of a given object.
9807 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9808 # Name is only meaningful for nodes and instances
9809 ("name", _NoDefault, _TMaybeString),
9813 def Exec(self, feedback_fn):
9814 """Returns the tag list.
9817 return list(self.target.GetTags())
9820 class LUSearchTags(NoHooksLU):
9821 """Searches the tags for a given pattern.
9825 ("pattern", _NoDefault, _TNonEmptyString),
9829 def ExpandNames(self):
9830 self.needed_locks = {}
9832 def CheckPrereq(self):
9833 """Check prerequisites.
9835 This checks the pattern passed for validity by compiling it.
9839 self.re = re.compile(self.op.pattern)
9840 except re.error, err:
9841 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9842 (self.op.pattern, err), errors.ECODE_INVAL)
9844 def Exec(self, feedback_fn):
9845 """Returns the tag list.
9849 tgts = [("/cluster", cfg.GetClusterInfo())]
9850 ilist = cfg.GetAllInstancesInfo().values()
9851 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9852 nlist = cfg.GetAllNodesInfo().values()
9853 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9855 for path, target in tgts:
9856 for tag in target.GetTags():
9857 if self.re.search(tag):
9858 results.append((path, tag))
9862 class LUAddTags(TagsLU):
9863 """Sets a tag on a given object.
9867 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9868 # Name is only meaningful for nodes and instances
9869 ("name", _NoDefault, _TMaybeString),
9870 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9874 def CheckPrereq(self):
9875 """Check prerequisites.
9877 This checks the type and length of the tag name and value.
9880 TagsLU.CheckPrereq(self)
9881 for tag in self.op.tags:
9882 objects.TaggableObject.ValidateTag(tag)
9884 def Exec(self, feedback_fn):
9889 for tag in self.op.tags:
9890 self.target.AddTag(tag)
9891 except errors.TagError, err:
9892 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9893 self.cfg.Update(self.target, feedback_fn)
9896 class LUDelTags(TagsLU):
9897 """Delete a list of tags from a given object.
9901 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9902 # Name is only meaningful for nodes and instances
9903 ("name", _NoDefault, _TMaybeString),
9904 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9908 def CheckPrereq(self):
9909 """Check prerequisites.
9911 This checks that we have the given tag.
9914 TagsLU.CheckPrereq(self)
9915 for tag in self.op.tags:
9916 objects.TaggableObject.ValidateTag(tag)
9917 del_tags = frozenset(self.op.tags)
9918 cur_tags = self.target.GetTags()
9919 if not del_tags <= cur_tags:
9920 diff_tags = del_tags - cur_tags
9921 diff_names = ["'%s'" % tag for tag in diff_tags]
9923 raise errors.OpPrereqError("Tag(s) %s not found" %
9924 (",".join(diff_names)), errors.ECODE_NOENT)
9926 def Exec(self, feedback_fn):
9927 """Remove the tag from the object.
9930 for tag in self.op.tags:
9931 self.target.RemoveTag(tag)
9932 self.cfg.Update(self.target, feedback_fn)
9935 class LUTestDelay(NoHooksLU):
9936 """Sleep for a specified amount of time.
9938 This LU sleeps on the master and/or nodes for a specified amount of
9943 ("duration", _NoDefault, _TFloat),
9944 ("on_master", True, _TBool),
9945 ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9946 ("repeat", 0, _TPositiveInt)
9950 def ExpandNames(self):
9951 """Expand names and set required locks.
9953 This expands the node list, if any.
9956 self.needed_locks = {}
9957 if self.op.on_nodes:
9958 # _GetWantedNodes can be used here, but is not always appropriate to use
9959 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9961 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9962 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9964 def _TestDelay(self):
9965 """Do the actual sleep.
9968 if self.op.on_master:
9969 if not utils.TestDelay(self.op.duration):
9970 raise errors.OpExecError("Error during master delay test")
9971 if self.op.on_nodes:
9972 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9973 for node, node_result in result.items():
9974 node_result.Raise("Failure during rpc call to node %s" % node)
9976 def Exec(self, feedback_fn):
9977 """Execute the test delay opcode, with the wanted repetitions.
9980 if self.op.repeat == 0:
9983 top_value = self.op.repeat - 1
9984 for i in range(self.op.repeat):
9985 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9989 class LUTestJobqueue(NoHooksLU):
9990 """Utility LU to test some aspects of the job queue.
9994 ("notify_waitlock", False, _TBool),
9995 ("notify_exec", False, _TBool),
9996 ("log_messages", _EmptyList, _TListOf(_TString)),
9997 ("fail", False, _TBool),
10001 # Must be lower than default timeout for WaitForJobChange to see whether it
10002 # notices changed jobs
10003 _CLIENT_CONNECT_TIMEOUT = 20.0
10004 _CLIENT_CONFIRM_TIMEOUT = 60.0
10007 def _NotifyUsingSocket(cls, cb, errcls):
10008 """Opens a Unix socket and waits for another program to connect.
10011 @param cb: Callback to send socket name to client
10012 @type errcls: class
10013 @param errcls: Exception class to use for errors
10016 # Using a temporary directory as there's no easy way to create temporary
10017 # sockets without writing a custom loop around tempfile.mktemp and
10019 tmpdir = tempfile.mkdtemp()
10021 tmpsock = utils.PathJoin(tmpdir, "sock")
10023 logging.debug("Creating temporary socket at %s", tmpsock)
10024 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10029 # Send details to client
10032 # Wait for client to connect before continuing
10033 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10035 (conn, _) = sock.accept()
10036 except socket.error, err:
10037 raise errcls("Client didn't connect in time (%s)" % err)
10041 # Remove as soon as client is connected
10042 shutil.rmtree(tmpdir)
10044 # Wait for client to close
10047 # pylint: disable-msg=E1101
10048 # Instance of '_socketobject' has no ... member
10049 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10051 except socket.error, err:
10052 raise errcls("Client failed to confirm notification (%s)" % err)
10056 def _SendNotification(self, test, arg, sockname):
10057 """Sends a notification to the client.
10060 @param test: Test name
10061 @param arg: Test argument (depends on test)
10062 @type sockname: string
10063 @param sockname: Socket path
10066 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10068 def _Notify(self, prereq, test, arg):
10069 """Notifies the client of a test.
10072 @param prereq: Whether this is a prereq-phase test
10074 @param test: Test name
10075 @param arg: Test argument (depends on test)
10079 errcls = errors.OpPrereqError
10081 errcls = errors.OpExecError
10083 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10087 def CheckArguments(self):
10088 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10089 self.expandnames_calls = 0
10091 def ExpandNames(self):
10092 checkargs_calls = getattr(self, "checkargs_calls", 0)
10093 if checkargs_calls < 1:
10094 raise errors.ProgrammerError("CheckArguments was not called")
10096 self.expandnames_calls += 1
10098 if self.op.notify_waitlock:
10099 self._Notify(True, constants.JQT_EXPANDNAMES, None)
10101 self.LogInfo("Expanding names")
10103 # Get lock on master node (just to get a lock, not for a particular reason)
10104 self.needed_locks = {
10105 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10108 def Exec(self, feedback_fn):
10109 if self.expandnames_calls < 1:
10110 raise errors.ProgrammerError("ExpandNames was not called")
10112 if self.op.notify_exec:
10113 self._Notify(False, constants.JQT_EXEC, None)
10115 self.LogInfo("Executing")
10117 if self.op.log_messages:
10118 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10119 for idx, msg in enumerate(self.op.log_messages):
10120 self.LogInfo("Sending log message %s", idx + 1)
10121 feedback_fn(constants.JQT_MSGPREFIX + msg)
10122 # Report how many test messages have been sent
10123 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10126 raise errors.OpExecError("Opcode failure was requested")
10131 class IAllocator(object):
10132 """IAllocator framework.
10134 An IAllocator instance has three sets of attributes:
10135 - cfg that is needed to query the cluster
10136 - input data (all members of the _KEYS class attribute are required)
10137 - four buffer attributes (in|out_data|text), that represent the
10138 input (to the external script) in text and data structure format,
10139 and the output from it, again in two formats
10140 - the result variables from the script (success, info, nodes) for
10144 # pylint: disable-msg=R0902
10145 # lots of instance attributes
10147 "name", "mem_size", "disks", "disk_template",
10148 "os", "tags", "nics", "vcpus", "hypervisor",
10151 "name", "relocate_from",
10157 def __init__(self, cfg, rpc, mode, **kwargs):
10160 # init buffer variables
10161 self.in_text = self.out_text = self.in_data = self.out_data = None
10162 # init all input fields so that pylint is happy
10164 self.mem_size = self.disks = self.disk_template = None
10165 self.os = self.tags = self.nics = self.vcpus = None
10166 self.hypervisor = None
10167 self.relocate_from = None
10169 self.evac_nodes = None
10171 self.required_nodes = None
10172 # init result fields
10173 self.success = self.info = self.result = None
10174 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10175 keyset = self._ALLO_KEYS
10176 fn = self._AddNewInstance
10177 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10178 keyset = self._RELO_KEYS
10179 fn = self._AddRelocateInstance
10180 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10181 keyset = self._EVAC_KEYS
10182 fn = self._AddEvacuateNodes
10184 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10185 " IAllocator" % self.mode)
10187 if key not in keyset:
10188 raise errors.ProgrammerError("Invalid input parameter '%s' to"
10189 " IAllocator" % key)
10190 setattr(self, key, kwargs[key])
10193 if key not in kwargs:
10194 raise errors.ProgrammerError("Missing input parameter '%s' to"
10195 " IAllocator" % key)
10196 self._BuildInputData(fn)
10198 def _ComputeClusterData(self):
10199 """Compute the generic allocator input data.
10201 This is the data that is independent of the actual operation.
10205 cluster_info = cfg.GetClusterInfo()
10208 "version": constants.IALLOCATOR_VERSION,
10209 "cluster_name": cfg.GetClusterName(),
10210 "cluster_tags": list(cluster_info.GetTags()),
10211 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10212 # we don't have job IDs
10214 iinfo = cfg.GetAllInstancesInfo().values()
10215 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10219 node_list = cfg.GetNodeList()
10221 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10222 hypervisor_name = self.hypervisor
10223 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10224 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10225 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10226 hypervisor_name = cluster_info.enabled_hypervisors[0]
10228 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10231 self.rpc.call_all_instances_info(node_list,
10232 cluster_info.enabled_hypervisors)
10233 for nname, nresult in node_data.items():
10234 # first fill in static (config-based) values
10235 ninfo = cfg.GetNodeInfo(nname)
10237 "tags": list(ninfo.GetTags()),
10238 "primary_ip": ninfo.primary_ip,
10239 "secondary_ip": ninfo.secondary_ip,
10240 "offline": ninfo.offline,
10241 "drained": ninfo.drained,
10242 "master_candidate": ninfo.master_candidate,
10245 if not (ninfo.offline or ninfo.drained):
10246 nresult.Raise("Can't get data for node %s" % nname)
10247 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10249 remote_info = nresult.payload
10251 for attr in ['memory_total', 'memory_free', 'memory_dom0',
10252 'vg_size', 'vg_free', 'cpu_total']:
10253 if attr not in remote_info:
10254 raise errors.OpExecError("Node '%s' didn't return attribute"
10255 " '%s'" % (nname, attr))
10256 if not isinstance(remote_info[attr], int):
10257 raise errors.OpExecError("Node '%s' returned invalid value"
10259 (nname, attr, remote_info[attr]))
10260 # compute memory used by primary instances
10261 i_p_mem = i_p_up_mem = 0
10262 for iinfo, beinfo in i_list:
10263 if iinfo.primary_node == nname:
10264 i_p_mem += beinfo[constants.BE_MEMORY]
10265 if iinfo.name not in node_iinfo[nname].payload:
10268 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10269 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10270 remote_info['memory_free'] -= max(0, i_mem_diff)
10273 i_p_up_mem += beinfo[constants.BE_MEMORY]
10275 # compute memory used by instances
10277 "total_memory": remote_info['memory_total'],
10278 "reserved_memory": remote_info['memory_dom0'],
10279 "free_memory": remote_info['memory_free'],
10280 "total_disk": remote_info['vg_size'],
10281 "free_disk": remote_info['vg_free'],
10282 "total_cpus": remote_info['cpu_total'],
10283 "i_pri_memory": i_p_mem,
10284 "i_pri_up_memory": i_p_up_mem,
10286 pnr.update(pnr_dyn)
10288 node_results[nname] = pnr
10289 data["nodes"] = node_results
10293 for iinfo, beinfo in i_list:
10295 for nic in iinfo.nics:
10296 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10297 nic_dict = {"mac": nic.mac,
10299 "mode": filled_params[constants.NIC_MODE],
10300 "link": filled_params[constants.NIC_LINK],
10302 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10303 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10304 nic_data.append(nic_dict)
10306 "tags": list(iinfo.GetTags()),
10307 "admin_up": iinfo.admin_up,
10308 "vcpus": beinfo[constants.BE_VCPUS],
10309 "memory": beinfo[constants.BE_MEMORY],
10311 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10313 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10314 "disk_template": iinfo.disk_template,
10315 "hypervisor": iinfo.hypervisor,
10317 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10319 instance_data[iinfo.name] = pir
10321 data["instances"] = instance_data
10323 self.in_data = data
10325 def _AddNewInstance(self):
10326 """Add new instance data to allocator structure.
10328 This in combination with _AllocatorGetClusterData will create the
10329 correct structure needed as input for the allocator.
10331 The checks for the completeness of the opcode must have already been
10335 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10337 if self.disk_template in constants.DTS_NET_MIRROR:
10338 self.required_nodes = 2
10340 self.required_nodes = 1
10343 "disk_template": self.disk_template,
10346 "vcpus": self.vcpus,
10347 "memory": self.mem_size,
10348 "disks": self.disks,
10349 "disk_space_total": disk_space,
10351 "required_nodes": self.required_nodes,
10355 def _AddRelocateInstance(self):
10356 """Add relocate instance data to allocator structure.
10358 This in combination with _IAllocatorGetClusterData will create the
10359 correct structure needed as input for the allocator.
10361 The checks for the completeness of the opcode must have already been
10365 instance = self.cfg.GetInstanceInfo(self.name)
10366 if instance is None:
10367 raise errors.ProgrammerError("Unknown instance '%s' passed to"
10368 " IAllocator" % self.name)
10370 if instance.disk_template not in constants.DTS_NET_MIRROR:
10371 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10372 errors.ECODE_INVAL)
10374 if len(instance.secondary_nodes) != 1:
10375 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10376 errors.ECODE_STATE)
10378 self.required_nodes = 1
10379 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10380 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10384 "disk_space_total": disk_space,
10385 "required_nodes": self.required_nodes,
10386 "relocate_from": self.relocate_from,
10390 def _AddEvacuateNodes(self):
10391 """Add evacuate nodes data to allocator structure.
10395 "evac_nodes": self.evac_nodes
10399 def _BuildInputData(self, fn):
10400 """Build input data structures.
10403 self._ComputeClusterData()
10406 request["type"] = self.mode
10407 self.in_data["request"] = request
10409 self.in_text = serializer.Dump(self.in_data)
10411 def Run(self, name, validate=True, call_fn=None):
10412 """Run an instance allocator and return the results.
10415 if call_fn is None:
10416 call_fn = self.rpc.call_iallocator_runner
10418 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10419 result.Raise("Failure while running the iallocator script")
10421 self.out_text = result.payload
10423 self._ValidateResult()
10425 def _ValidateResult(self):
10426 """Process the allocator results.
10428 This will process and if successful save the result in
10429 self.out_data and the other parameters.
10433 rdict = serializer.Load(self.out_text)
10434 except Exception, err:
10435 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10437 if not isinstance(rdict, dict):
10438 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10440 # TODO: remove backwards compatiblity in later versions
10441 if "nodes" in rdict and "result" not in rdict:
10442 rdict["result"] = rdict["nodes"]
10445 for key in "success", "info", "result":
10446 if key not in rdict:
10447 raise errors.OpExecError("Can't parse iallocator results:"
10448 " missing key '%s'" % key)
10449 setattr(self, key, rdict[key])
10451 if not isinstance(rdict["result"], list):
10452 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10454 self.out_data = rdict
10457 class LUTestAllocator(NoHooksLU):
10458 """Run allocator tests.
10460 This LU runs the allocator tests
10464 ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10465 ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10466 ("name", _NoDefault, _TNonEmptyString),
10467 ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10468 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10469 _TOr(_TNone, _TNonEmptyString))))),
10470 ("disks", _NoDefault, _TOr(_TNone, _TList)),
10471 ("hypervisor", None, _TMaybeString),
10472 ("allocator", None, _TMaybeString),
10473 ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10474 ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10475 ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10476 ("os", None, _TMaybeString),
10477 ("disk_template", None, _TMaybeString),
10478 ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10481 def CheckPrereq(self):
10482 """Check prerequisites.
10484 This checks the opcode parameters depending on the director and mode test.
10487 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10488 for attr in ["mem_size", "disks", "disk_template",
10489 "os", "tags", "nics", "vcpus"]:
10490 if not hasattr(self.op, attr):
10491 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10492 attr, errors.ECODE_INVAL)
10493 iname = self.cfg.ExpandInstanceName(self.op.name)
10494 if iname is not None:
10495 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10496 iname, errors.ECODE_EXISTS)
10497 if not isinstance(self.op.nics, list):
10498 raise errors.OpPrereqError("Invalid parameter 'nics'",
10499 errors.ECODE_INVAL)
10500 if not isinstance(self.op.disks, list):
10501 raise errors.OpPrereqError("Invalid parameter 'disks'",
10502 errors.ECODE_INVAL)
10503 for row in self.op.disks:
10504 if (not isinstance(row, dict) or
10505 "size" not in row or
10506 not isinstance(row["size"], int) or
10507 "mode" not in row or
10508 row["mode"] not in ['r', 'w']):
10509 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10510 " parameter", errors.ECODE_INVAL)
10511 if self.op.hypervisor is None:
10512 self.op.hypervisor = self.cfg.GetHypervisorType()
10513 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10514 fname = _ExpandInstanceName(self.cfg, self.op.name)
10515 self.op.name = fname
10516 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10517 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10518 if not hasattr(self.op, "evac_nodes"):
10519 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10520 " opcode input", errors.ECODE_INVAL)
10522 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10523 self.op.mode, errors.ECODE_INVAL)
10525 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10526 if self.op.allocator is None:
10527 raise errors.OpPrereqError("Missing allocator name",
10528 errors.ECODE_INVAL)
10529 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10530 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10531 self.op.direction, errors.ECODE_INVAL)
10533 def Exec(self, feedback_fn):
10534 """Run the allocator test.
10537 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10538 ial = IAllocator(self.cfg, self.rpc,
10541 mem_size=self.op.mem_size,
10542 disks=self.op.disks,
10543 disk_template=self.op.disk_template,
10547 vcpus=self.op.vcpus,
10548 hypervisor=self.op.hypervisor,
10550 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10551 ial = IAllocator(self.cfg, self.rpc,
10554 relocate_from=list(self.relocate_from),
10556 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10557 ial = IAllocator(self.cfg, self.rpc,
10559 evac_nodes=self.op.evac_nodes)
10561 raise errors.ProgrammerError("Uncatched mode %s in"
10562 " LUTestAllocator.Exec", self.op.mode)
10564 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10565 result = ial.in_text
10567 ial.Run(self.op.allocator, validate=False)
10568 result = ial.out_text