4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
40 from ganeti import ssh
41 from ganeti import utils
42 from ganeti import errors
43 from ganeti import hypervisor
44 from ganeti import locking
45 from ganeti import constants
46 from ganeti import objects
47 from ganeti import serializer
48 from ganeti import ssconf
49 from ganeti import uidpool
50 from ganeti import compat
51 from ganeti import masterd
53 import ganeti.masterd.instance # pylint: disable-msg=W0611
56 # Modifiable default values; need to define these here before the
60 """Returns an empty list.
67 """Returns an empty dict.
73 #: The without-default default value
77 #: The no-type (value to complex to check it in the type system)
83 """Checks if the given value is not None.
86 return val is not None
90 """Checks if the given value is None.
97 """Checks if the given value is a boolean.
100 return isinstance(val, bool)
104 """Checks if the given value is an integer.
107 return isinstance(val, int)
111 """Checks if the given value is a float.
114 return isinstance(val, float)
118 """Checks if the given value is a string.
121 return isinstance(val, basestring)
125 """Checks if a given value evaluates to a boolean True value.
131 def _TElemOf(target_list):
132 """Builds a function that checks if a given value is a member of a list.
135 return lambda val: val in target_list
140 """Checks if the given value is a list.
143 return isinstance(val, list)
147 """Checks if the given value is a dictionary.
150 return isinstance(val, dict)
155 """Combine multiple functions using an AND operation.
159 return compat.all(t(val) for t in args)
164 """Combine multiple functions using an AND operation.
168 return compat.any(t(val) for t in args)
174 #: a non-empty string
175 _TNonEmptyString = _TAnd(_TString, _TTrue)
178 #: a maybe non-empty string
179 _TMaybeString = _TOr(_TNonEmptyString, _TNone)
182 #: a maybe boolean (bool or none)
183 _TMaybeBool = _TOr(_TBool, _TNone)
186 #: a positive integer
187 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
189 #: a strictly positive integer
190 _TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0)
193 def _TListOf(my_type):
194 """Checks if a given value is a list with all elements of the same type.
198 lambda lst: compat.all(my_type(v) for v in lst))
201 def _TDictOf(key_type, val_type):
202 """Checks a dict type for the type of its key/values.
206 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
207 and compat.all(val_type(v)
208 for v in my_dict.values())))
211 # Common opcode attributes
213 #: output fields for a query operation
214 _POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString))
217 #: the shutdown timeout
218 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
221 #: the force parameter
222 _PForce = ("force", False, _TBool)
224 #: a required instance name (for single-instance LUs)
225 _PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString)
228 #: a required node name (for single-node LUs)
229 _PNodeName = ("node_name", _NoDefault, _TNonEmptyString)
233 class LogicalUnit(object):
234 """Logical Unit base class.
236 Subclasses must follow these rules:
237 - implement ExpandNames
238 - implement CheckPrereq (except when tasklets are used)
239 - implement Exec (except when tasklets are used)
240 - implement BuildHooksEnv
241 - redefine HPATH and HTYPE
242 - optionally redefine their run requirements:
243 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
245 Note that all commands require root permissions.
247 @ivar dry_run_result: the value (if any) that will be returned to the caller
248 in dry-run mode (signalled by opcode dry_run parameter)
249 @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
250 they should get if not already defined, and types they must match
258 def __init__(self, processor, op, context, rpc):
259 """Constructor for LogicalUnit.
261 This needs to be overridden in derived classes in order to check op
265 self.proc = processor
267 self.cfg = context.cfg
268 self.context = context
270 # Dicts used to declare locking needs to mcpu
271 self.needed_locks = None
272 self.acquired_locks = {}
273 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
275 self.remove_locks = {}
276 # Used to force good behavior when calling helper functions
277 self.recalculate_locks = {}
280 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
281 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
282 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
283 # support for dry-run
284 self.dry_run_result = None
285 # support for generic debug attribute
286 if (not hasattr(self.op, "debug_level") or
287 not isinstance(self.op.debug_level, int)):
288 self.op.debug_level = 0
293 # The new kind-of-type-system
294 op_id = self.op.OP_ID
295 for attr_name, aval, test in self._OP_PARAMS:
296 if not hasattr(op, attr_name):
297 if aval == _NoDefault:
298 raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
299 (op_id, attr_name), errors.ECODE_INVAL)
305 setattr(self.op, attr_name, dval)
306 attr_val = getattr(op, attr_name)
310 if not callable(test):
311 raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
312 " given type is not a proper type (%s)" %
313 (op_id, attr_name, test))
314 if not test(attr_val):
315 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
316 self.op.OP_ID, attr_name, type(attr_val), attr_val)
317 raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
318 (op_id, attr_name), errors.ECODE_INVAL)
320 self.CheckArguments()
323 """Returns the SshRunner object
327 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
330 ssh = property(fget=__GetSSH)
332 def CheckArguments(self):
333 """Check syntactic validity for the opcode arguments.
335 This method is for doing a simple syntactic check and ensure
336 validity of opcode parameters, without any cluster-related
337 checks. While the same can be accomplished in ExpandNames and/or
338 CheckPrereq, doing these separate is better because:
340 - ExpandNames is left as as purely a lock-related function
341 - CheckPrereq is run after we have acquired locks (and possible
344 The function is allowed to change the self.op attribute so that
345 later methods can no longer worry about missing parameters.
350 def ExpandNames(self):
351 """Expand names for this LU.
353 This method is called before starting to execute the opcode, and it should
354 update all the parameters of the opcode to their canonical form (e.g. a
355 short node name must be fully expanded after this method has successfully
356 completed). This way locking, hooks, logging, ecc. can work correctly.
358 LUs which implement this method must also populate the self.needed_locks
359 member, as a dict with lock levels as keys, and a list of needed lock names
362 - use an empty dict if you don't need any lock
363 - if you don't need any lock at a particular level omit that level
364 - don't put anything for the BGL level
365 - if you want all locks at a level use locking.ALL_SET as a value
367 If you need to share locks (rather than acquire them exclusively) at one
368 level you can modify self.share_locks, setting a true value (usually 1) for
369 that level. By default locks are not shared.
371 This function can also define a list of tasklets, which then will be
372 executed in order instead of the usual LU-level CheckPrereq and Exec
373 functions, if those are not defined by the LU.
377 # Acquire all nodes and one instance
378 self.needed_locks = {
379 locking.LEVEL_NODE: locking.ALL_SET,
380 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
382 # Acquire just two nodes
383 self.needed_locks = {
384 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
387 self.needed_locks = {} # No, you can't leave it to the default value None
390 # The implementation of this method is mandatory only if the new LU is
391 # concurrent, so that old LUs don't need to be changed all at the same
394 self.needed_locks = {} # Exclusive LUs don't need locks.
396 raise NotImplementedError
398 def DeclareLocks(self, level):
399 """Declare LU locking needs for a level
401 While most LUs can just declare their locking needs at ExpandNames time,
402 sometimes there's the need to calculate some locks after having acquired
403 the ones before. This function is called just before acquiring locks at a
404 particular level, but after acquiring the ones at lower levels, and permits
405 such calculations. It can be used to modify self.needed_locks, and by
406 default it does nothing.
408 This function is only called if you have something already set in
409 self.needed_locks for the level.
411 @param level: Locking level which is going to be locked
412 @type level: member of ganeti.locking.LEVELS
416 def CheckPrereq(self):
417 """Check prerequisites for this LU.
419 This method should check that the prerequisites for the execution
420 of this LU are fulfilled. It can do internode communication, but
421 it should be idempotent - no cluster or system changes are
424 The method should raise errors.OpPrereqError in case something is
425 not fulfilled. Its return value is ignored.
427 This method should also update all the parameters of the opcode to
428 their canonical form if it hasn't been done by ExpandNames before.
431 if self.tasklets is not None:
432 for (idx, tl) in enumerate(self.tasklets):
433 logging.debug("Checking prerequisites for tasklet %s/%s",
434 idx + 1, len(self.tasklets))
439 def Exec(self, feedback_fn):
442 This method should implement the actual work. It should raise
443 errors.OpExecError for failures that are somewhat dealt with in
447 if self.tasklets is not None:
448 for (idx, tl) in enumerate(self.tasklets):
449 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
452 raise NotImplementedError
454 def BuildHooksEnv(self):
455 """Build hooks environment for this LU.
457 This method should return a three-node tuple consisting of: a dict
458 containing the environment that will be used for running the
459 specific hook for this LU, a list of node names on which the hook
460 should run before the execution, and a list of node names on which
461 the hook should run after the execution.
463 The keys of the dict must not have 'GANETI_' prefixed as this will
464 be handled in the hooks runner. Also note additional keys will be
465 added by the hooks runner. If the LU doesn't define any
466 environment, an empty dict (and not None) should be returned.
468 No nodes should be returned as an empty list (and not None).
470 Note that if the HPATH for a LU class is None, this function will
474 raise NotImplementedError
476 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
477 """Notify the LU about the results of its hooks.
479 This method is called every time a hooks phase is executed, and notifies
480 the Logical Unit about the hooks' result. The LU can then use it to alter
481 its result based on the hooks. By default the method does nothing and the
482 previous result is passed back unchanged but any LU can define it if it
483 wants to use the local cluster hook-scripts somehow.
485 @param phase: one of L{constants.HOOKS_PHASE_POST} or
486 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
487 @param hook_results: the results of the multi-node hooks rpc call
488 @param feedback_fn: function used send feedback back to the caller
489 @param lu_result: the previous Exec result this LU had, or None
491 @return: the new Exec result, based on the previous result
495 # API must be kept, thus we ignore the unused argument and could
496 # be a function warnings
497 # pylint: disable-msg=W0613,R0201
500 def _ExpandAndLockInstance(self):
501 """Helper function to expand and lock an instance.
503 Many LUs that work on an instance take its name in self.op.instance_name
504 and need to expand it and then declare the expanded name for locking. This
505 function does it, and then updates self.op.instance_name to the expanded
506 name. It also initializes needed_locks as a dict, if this hasn't been done
510 if self.needed_locks is None:
511 self.needed_locks = {}
513 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
514 "_ExpandAndLockInstance called with instance-level locks set"
515 self.op.instance_name = _ExpandInstanceName(self.cfg,
516 self.op.instance_name)
517 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
519 def _LockInstancesNodes(self, primary_only=False):
520 """Helper function to declare instances' nodes for locking.
522 This function should be called after locking one or more instances to lock
523 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
524 with all primary or secondary nodes for instances already locked and
525 present in self.needed_locks[locking.LEVEL_INSTANCE].
527 It should be called from DeclareLocks, and for safety only works if
528 self.recalculate_locks[locking.LEVEL_NODE] is set.
530 In the future it may grow parameters to just lock some instance's nodes, or
531 to just lock primaries or secondary nodes, if needed.
533 If should be called in DeclareLocks in a way similar to::
535 if level == locking.LEVEL_NODE:
536 self._LockInstancesNodes()
538 @type primary_only: boolean
539 @param primary_only: only lock primary nodes of locked instances
542 assert locking.LEVEL_NODE in self.recalculate_locks, \
543 "_LockInstancesNodes helper function called with no nodes to recalculate"
545 # TODO: check if we're really been called with the instance locks held
547 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
548 # future we might want to have different behaviors depending on the value
549 # of self.recalculate_locks[locking.LEVEL_NODE]
551 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
552 instance = self.context.cfg.GetInstanceInfo(instance_name)
553 wanted_nodes.append(instance.primary_node)
555 wanted_nodes.extend(instance.secondary_nodes)
557 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
558 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
559 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
560 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
562 del self.recalculate_locks[locking.LEVEL_NODE]
565 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
566 """Simple LU which runs no hooks.
568 This LU is intended as a parent for other LogicalUnits which will
569 run no hooks, in order to reduce duplicate code.
575 def BuildHooksEnv(self):
576 """Empty BuildHooksEnv for NoHooksLu.
578 This just raises an error.
581 assert False, "BuildHooksEnv called for NoHooksLUs"
585 """Tasklet base class.
587 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
588 they can mix legacy code with tasklets. Locking needs to be done in the LU,
589 tasklets know nothing about locks.
591 Subclasses must follow these rules:
592 - Implement CheckPrereq
596 def __init__(self, lu):
603 def CheckPrereq(self):
604 """Check prerequisites for this tasklets.
606 This method should check whether the prerequisites for the execution of
607 this tasklet are fulfilled. It can do internode communication, but it
608 should be idempotent - no cluster or system changes are allowed.
610 The method should raise errors.OpPrereqError in case something is not
611 fulfilled. Its return value is ignored.
613 This method should also update all parameters to their canonical form if it
614 hasn't been done before.
619 def Exec(self, feedback_fn):
620 """Execute the tasklet.
622 This method should implement the actual work. It should raise
623 errors.OpExecError for failures that are somewhat dealt with in code, or
627 raise NotImplementedError
630 def _GetWantedNodes(lu, nodes):
631 """Returns list of checked and expanded node names.
633 @type lu: L{LogicalUnit}
634 @param lu: the logical unit on whose behalf we execute
636 @param nodes: list of node names or None for all nodes
638 @return: the list of nodes, sorted
639 @raise errors.ProgrammerError: if the nodes parameter is wrong type
643 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
644 " non-empty list of nodes whose name is to be expanded.")
646 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
647 return utils.NiceSort(wanted)
650 def _GetWantedInstances(lu, instances):
651 """Returns list of checked and expanded instance names.
653 @type lu: L{LogicalUnit}
654 @param lu: the logical unit on whose behalf we execute
655 @type instances: list
656 @param instances: list of instance names or None for all instances
658 @return: the list of instances, sorted
659 @raise errors.OpPrereqError: if the instances parameter is wrong type
660 @raise errors.OpPrereqError: if any of the passed instances is not found
664 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
666 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
670 def _GetUpdatedParams(old_params, update_dict,
671 use_default=True, use_none=False):
672 """Return the new version of a parameter dictionary.
674 @type old_params: dict
675 @param old_params: old parameters
676 @type update_dict: dict
677 @param update_dict: dict containing new parameter values, or
678 constants.VALUE_DEFAULT to reset the parameter to its default
680 @param use_default: boolean
681 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
682 values as 'to be deleted' values
683 @param use_none: boolean
684 @type use_none: whether to recognise C{None} values as 'to be
687 @return: the new parameter dictionary
690 params_copy = copy.deepcopy(old_params)
691 for key, val in update_dict.iteritems():
692 if ((use_default and val == constants.VALUE_DEFAULT) or
693 (use_none and val is None)):
699 params_copy[key] = val
703 def _CheckOutputFields(static, dynamic, selected):
704 """Checks whether all selected fields are valid.
706 @type static: L{utils.FieldSet}
707 @param static: static fields set
708 @type dynamic: L{utils.FieldSet}
709 @param dynamic: dynamic fields set
716 delta = f.NonMatching(selected)
718 raise errors.OpPrereqError("Unknown output fields selected: %s"
719 % ",".join(delta), errors.ECODE_INVAL)
722 def _CheckGlobalHvParams(params):
723 """Validates that given hypervisor params are not global ones.
725 This will ensure that instances don't get customised versions of
729 used_globals = constants.HVC_GLOBALS.intersection(params)
731 msg = ("The following hypervisor parameters are global and cannot"
732 " be customized at instance level, please modify them at"
733 " cluster level: %s" % utils.CommaJoin(used_globals))
734 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
737 def _CheckNodeOnline(lu, node):
738 """Ensure that a given node is online.
740 @param lu: the LU on behalf of which we make the check
741 @param node: the node to check
742 @raise errors.OpPrereqError: if the node is offline
745 if lu.cfg.GetNodeInfo(node).offline:
746 raise errors.OpPrereqError("Can't use offline node %s" % node,
750 def _CheckNodeNotDrained(lu, node):
751 """Ensure that a given node is not drained.
753 @param lu: the LU on behalf of which we make the check
754 @param node: the node to check
755 @raise errors.OpPrereqError: if the node is drained
758 if lu.cfg.GetNodeInfo(node).drained:
759 raise errors.OpPrereqError("Can't use drained node %s" % node,
763 def _CheckNodeHasOS(lu, node, os_name, force_variant):
764 """Ensure that a node supports a given OS.
766 @param lu: the LU on behalf of which we make the check
767 @param node: the node to check
768 @param os_name: the OS to query about
769 @param force_variant: whether to ignore variant errors
770 @raise errors.OpPrereqError: if the node is not supporting the OS
773 result = lu.rpc.call_os_get(node, os_name)
774 result.Raise("OS '%s' not in supported OS list for node %s" %
776 prereq=True, ecode=errors.ECODE_INVAL)
777 if not force_variant:
778 _CheckOSVariant(result.payload, os_name)
781 def _RequireFileStorage():
782 """Checks that file storage is enabled.
784 @raise errors.OpPrereqError: when file storage is disabled
787 if not constants.ENABLE_FILE_STORAGE:
788 raise errors.OpPrereqError("File storage disabled at configure time",
792 def _CheckDiskTemplate(template):
793 """Ensure a given disk template is valid.
796 if template not in constants.DISK_TEMPLATES:
797 msg = ("Invalid disk template name '%s', valid templates are: %s" %
798 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
799 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
800 if template == constants.DT_FILE:
801 _RequireFileStorage()
805 def _CheckStorageType(storage_type):
806 """Ensure a given storage type is valid.
809 if storage_type not in constants.VALID_STORAGE_TYPES:
810 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
812 if storage_type == constants.ST_FILE:
813 _RequireFileStorage()
817 def _GetClusterDomainSecret():
818 """Reads the cluster domain secret.
821 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
825 def _CheckInstanceDown(lu, instance, reason):
826 """Ensure that an instance is not running."""
827 if instance.admin_up:
828 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
829 (instance.name, reason), errors.ECODE_STATE)
831 pnode = instance.primary_node
832 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
833 ins_l.Raise("Can't contact node %s for instance information" % pnode,
834 prereq=True, ecode=errors.ECODE_ENVIRON)
836 if instance.name in ins_l.payload:
837 raise errors.OpPrereqError("Instance %s is running, %s" %
838 (instance.name, reason), errors.ECODE_STATE)
841 def _ExpandItemName(fn, name, kind):
842 """Expand an item name.
844 @param fn: the function to use for expansion
845 @param name: requested item name
846 @param kind: text description ('Node' or 'Instance')
847 @return: the resolved (full) name
848 @raise errors.OpPrereqError: if the item is not found
852 if full_name is None:
853 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
858 def _ExpandNodeName(cfg, name):
859 """Wrapper over L{_ExpandItemName} for nodes."""
860 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
863 def _ExpandInstanceName(cfg, name):
864 """Wrapper over L{_ExpandItemName} for instance."""
865 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
868 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
869 memory, vcpus, nics, disk_template, disks,
870 bep, hvp, hypervisor_name):
871 """Builds instance related env variables for hooks
873 This builds the hook environment from individual variables.
876 @param name: the name of the instance
877 @type primary_node: string
878 @param primary_node: the name of the instance's primary node
879 @type secondary_nodes: list
880 @param secondary_nodes: list of secondary nodes as strings
881 @type os_type: string
882 @param os_type: the name of the instance's OS
883 @type status: boolean
884 @param status: the should_run status of the instance
886 @param memory: the memory size of the instance
888 @param vcpus: the count of VCPUs the instance has
890 @param nics: list of tuples (ip, mac, mode, link) representing
891 the NICs the instance has
892 @type disk_template: string
893 @param disk_template: the disk template of the instance
895 @param disks: the list of (size, mode) pairs
897 @param bep: the backend parameters for the instance
899 @param hvp: the hypervisor parameters for the instance
900 @type hypervisor_name: string
901 @param hypervisor_name: the hypervisor for the instance
903 @return: the hook environment for this instance
912 "INSTANCE_NAME": name,
913 "INSTANCE_PRIMARY": primary_node,
914 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
915 "INSTANCE_OS_TYPE": os_type,
916 "INSTANCE_STATUS": str_status,
917 "INSTANCE_MEMORY": memory,
918 "INSTANCE_VCPUS": vcpus,
919 "INSTANCE_DISK_TEMPLATE": disk_template,
920 "INSTANCE_HYPERVISOR": hypervisor_name,
924 nic_count = len(nics)
925 for idx, (ip, mac, mode, link) in enumerate(nics):
928 env["INSTANCE_NIC%d_IP" % idx] = ip
929 env["INSTANCE_NIC%d_MAC" % idx] = mac
930 env["INSTANCE_NIC%d_MODE" % idx] = mode
931 env["INSTANCE_NIC%d_LINK" % idx] = link
932 if mode == constants.NIC_MODE_BRIDGED:
933 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
937 env["INSTANCE_NIC_COUNT"] = nic_count
940 disk_count = len(disks)
941 for idx, (size, mode) in enumerate(disks):
942 env["INSTANCE_DISK%d_SIZE" % idx] = size
943 env["INSTANCE_DISK%d_MODE" % idx] = mode
947 env["INSTANCE_DISK_COUNT"] = disk_count
949 for source, kind in [(bep, "BE"), (hvp, "HV")]:
950 for key, value in source.items():
951 env["INSTANCE_%s_%s" % (kind, key)] = value
956 def _NICListToTuple(lu, nics):
957 """Build a list of nic information tuples.
959 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
960 value in LUQueryInstanceData.
962 @type lu: L{LogicalUnit}
963 @param lu: the logical unit on whose behalf we execute
964 @type nics: list of L{objects.NIC}
965 @param nics: list of nics to convert to hooks tuples
969 cluster = lu.cfg.GetClusterInfo()
973 filled_params = cluster.SimpleFillNIC(nic.nicparams)
974 mode = filled_params[constants.NIC_MODE]
975 link = filled_params[constants.NIC_LINK]
976 hooks_nics.append((ip, mac, mode, link))
980 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
981 """Builds instance related env variables for hooks from an object.
983 @type lu: L{LogicalUnit}
984 @param lu: the logical unit on whose behalf we execute
985 @type instance: L{objects.Instance}
986 @param instance: the instance for which we should build the
989 @param override: dictionary with key/values that will override
992 @return: the hook environment dictionary
995 cluster = lu.cfg.GetClusterInfo()
996 bep = cluster.FillBE(instance)
997 hvp = cluster.FillHV(instance)
999 'name': instance.name,
1000 'primary_node': instance.primary_node,
1001 'secondary_nodes': instance.secondary_nodes,
1002 'os_type': instance.os,
1003 'status': instance.admin_up,
1004 'memory': bep[constants.BE_MEMORY],
1005 'vcpus': bep[constants.BE_VCPUS],
1006 'nics': _NICListToTuple(lu, instance.nics),
1007 'disk_template': instance.disk_template,
1008 'disks': [(disk.size, disk.mode) for disk in instance.disks],
1011 'hypervisor_name': instance.hypervisor,
1014 args.update(override)
1015 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1018 def _AdjustCandidatePool(lu, exceptions):
1019 """Adjust the candidate pool after node operations.
1022 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1024 lu.LogInfo("Promoted nodes to master candidate role: %s",
1025 utils.CommaJoin(node.name for node in mod_list))
1026 for name in mod_list:
1027 lu.context.ReaddNode(name)
1028 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1030 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1034 def _DecideSelfPromotion(lu, exceptions=None):
1035 """Decide whether I should promote myself as a master candidate.
1038 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1039 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1040 # the new node will increase mc_max with one, so:
1041 mc_should = min(mc_should + 1, cp_size)
1042 return mc_now < mc_should
1045 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1046 """Check that the brigdes needed by a list of nics exist.
1049 cluster = lu.cfg.GetClusterInfo()
1050 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1051 brlist = [params[constants.NIC_LINK] for params in paramslist
1052 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1054 result = lu.rpc.call_bridges_exist(target_node, brlist)
1055 result.Raise("Error checking bridges on destination node '%s'" %
1056 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1059 def _CheckInstanceBridgesExist(lu, instance, node=None):
1060 """Check that the brigdes needed by an instance exist.
1064 node = instance.primary_node
1065 _CheckNicsBridgesExist(lu, instance.nics, node)
1068 def _CheckOSVariant(os_obj, name):
1069 """Check whether an OS name conforms to the os variants specification.
1071 @type os_obj: L{objects.OS}
1072 @param os_obj: OS object to check
1074 @param name: OS name passed by the user, to check for validity
1077 if not os_obj.supported_variants:
1080 variant = name.split("+", 1)[1]
1082 raise errors.OpPrereqError("OS name must include a variant",
1085 if variant not in os_obj.supported_variants:
1086 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1089 def _GetNodeInstancesInner(cfg, fn):
1090 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1093 def _GetNodeInstances(cfg, node_name):
1094 """Returns a list of all primary and secondary instances on a node.
1098 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1101 def _GetNodePrimaryInstances(cfg, node_name):
1102 """Returns primary instances on a node.
1105 return _GetNodeInstancesInner(cfg,
1106 lambda inst: node_name == inst.primary_node)
1109 def _GetNodeSecondaryInstances(cfg, node_name):
1110 """Returns secondary instances on a node.
1113 return _GetNodeInstancesInner(cfg,
1114 lambda inst: node_name in inst.secondary_nodes)
1117 def _GetStorageTypeArgs(cfg, storage_type):
1118 """Returns the arguments for a storage type.
1121 # Special case for file storage
1122 if storage_type == constants.ST_FILE:
1123 # storage.FileStorage wants a list of storage directories
1124 return [[cfg.GetFileStorageDir()]]
1129 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1132 for dev in instance.disks:
1133 cfg.SetDiskID(dev, node_name)
1135 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1136 result.Raise("Failed to get disk status from node %s" % node_name,
1137 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1139 for idx, bdev_status in enumerate(result.payload):
1140 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1146 class LUPostInitCluster(LogicalUnit):
1147 """Logical unit for running hooks after cluster initialization.
1150 HPATH = "cluster-init"
1151 HTYPE = constants.HTYPE_CLUSTER
1153 def BuildHooksEnv(self):
1157 env = {"OP_TARGET": self.cfg.GetClusterName()}
1158 mn = self.cfg.GetMasterNode()
1159 return env, [], [mn]
1161 def Exec(self, feedback_fn):
1168 class LUDestroyCluster(LogicalUnit):
1169 """Logical unit for destroying the cluster.
1172 HPATH = "cluster-destroy"
1173 HTYPE = constants.HTYPE_CLUSTER
1175 def BuildHooksEnv(self):
1179 env = {"OP_TARGET": self.cfg.GetClusterName()}
1182 def CheckPrereq(self):
1183 """Check prerequisites.
1185 This checks whether the cluster is empty.
1187 Any errors are signaled by raising errors.OpPrereqError.
1190 master = self.cfg.GetMasterNode()
1192 nodelist = self.cfg.GetNodeList()
1193 if len(nodelist) != 1 or nodelist[0] != master:
1194 raise errors.OpPrereqError("There are still %d node(s) in"
1195 " this cluster." % (len(nodelist) - 1),
1197 instancelist = self.cfg.GetInstanceList()
1199 raise errors.OpPrereqError("There are still %d instance(s) in"
1200 " this cluster." % len(instancelist),
1203 def Exec(self, feedback_fn):
1204 """Destroys the cluster.
1207 master = self.cfg.GetMasterNode()
1208 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1210 # Run post hooks on master node before it's removed
1211 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1213 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1215 # pylint: disable-msg=W0702
1216 self.LogWarning("Errors occurred running hooks on %s" % master)
1218 result = self.rpc.call_node_stop_master(master, False)
1219 result.Raise("Could not disable the master role")
1221 if modify_ssh_setup:
1222 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1223 utils.CreateBackup(priv_key)
1224 utils.CreateBackup(pub_key)
1229 def _VerifyCertificate(filename):
1230 """Verifies a certificate for LUVerifyCluster.
1232 @type filename: string
1233 @param filename: Path to PEM file
1237 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1238 utils.ReadFile(filename))
1239 except Exception, err: # pylint: disable-msg=W0703
1240 return (LUVerifyCluster.ETYPE_ERROR,
1241 "Failed to load X509 certificate %s: %s" % (filename, err))
1244 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1245 constants.SSL_CERT_EXPIRATION_ERROR)
1248 fnamemsg = "While verifying %s: %s" % (filename, msg)
1253 return (None, fnamemsg)
1254 elif errcode == utils.CERT_WARNING:
1255 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1256 elif errcode == utils.CERT_ERROR:
1257 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1259 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1262 class LUVerifyCluster(LogicalUnit):
1263 """Verifies the cluster status.
1266 HPATH = "cluster-verify"
1267 HTYPE = constants.HTYPE_CLUSTER
1269 ("skip_checks", _EmptyList,
1270 _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1271 ("verbose", False, _TBool),
1272 ("error_codes", False, _TBool),
1273 ("debug_simulate_errors", False, _TBool),
1277 TCLUSTER = "cluster"
1279 TINSTANCE = "instance"
1281 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1282 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1283 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1284 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1285 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1286 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1287 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1288 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1289 ENODEDRBD = (TNODE, "ENODEDRBD")
1290 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1291 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1292 ENODEHV = (TNODE, "ENODEHV")
1293 ENODELVM = (TNODE, "ENODELVM")
1294 ENODEN1 = (TNODE, "ENODEN1")
1295 ENODENET = (TNODE, "ENODENET")
1296 ENODEOS = (TNODE, "ENODEOS")
1297 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1298 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1299 ENODERPC = (TNODE, "ENODERPC")
1300 ENODESSH = (TNODE, "ENODESSH")
1301 ENODEVERSION = (TNODE, "ENODEVERSION")
1302 ENODESETUP = (TNODE, "ENODESETUP")
1303 ENODETIME = (TNODE, "ENODETIME")
1305 ETYPE_FIELD = "code"
1306 ETYPE_ERROR = "ERROR"
1307 ETYPE_WARNING = "WARNING"
1309 class NodeImage(object):
1310 """A class representing the logical and physical status of a node.
1313 @ivar name: the node name to which this object refers
1314 @ivar volumes: a structure as returned from
1315 L{ganeti.backend.GetVolumeList} (runtime)
1316 @ivar instances: a list of running instances (runtime)
1317 @ivar pinst: list of configured primary instances (config)
1318 @ivar sinst: list of configured secondary instances (config)
1319 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1320 of this node (config)
1321 @ivar mfree: free memory, as reported by hypervisor (runtime)
1322 @ivar dfree: free disk, as reported by the node (runtime)
1323 @ivar offline: the offline status (config)
1324 @type rpc_fail: boolean
1325 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1326 not whether the individual keys were correct) (runtime)
1327 @type lvm_fail: boolean
1328 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1329 @type hyp_fail: boolean
1330 @ivar hyp_fail: whether the RPC call didn't return the instance list
1331 @type ghost: boolean
1332 @ivar ghost: whether this is a known node or not (config)
1333 @type os_fail: boolean
1334 @ivar os_fail: whether the RPC call didn't return valid OS data
1336 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1339 def __init__(self, offline=False, name=None):
1348 self.offline = offline
1349 self.rpc_fail = False
1350 self.lvm_fail = False
1351 self.hyp_fail = False
1353 self.os_fail = False
1356 def ExpandNames(self):
1357 self.needed_locks = {
1358 locking.LEVEL_NODE: locking.ALL_SET,
1359 locking.LEVEL_INSTANCE: locking.ALL_SET,
1361 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1363 def _Error(self, ecode, item, msg, *args, **kwargs):
1364 """Format an error message.
1366 Based on the opcode's error_codes parameter, either format a
1367 parseable error code, or a simpler error string.
1369 This must be called only from Exec and functions called from Exec.
1372 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1374 # first complete the msg
1377 # then format the whole message
1378 if self.op.error_codes:
1379 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1385 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1386 # and finally report it via the feedback_fn
1387 self._feedback_fn(" - %s" % msg)
1389 def _ErrorIf(self, cond, *args, **kwargs):
1390 """Log an error message if the passed condition is True.
1393 cond = bool(cond) or self.op.debug_simulate_errors
1395 self._Error(*args, **kwargs)
1396 # do not mark the operation as failed for WARN cases only
1397 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1398 self.bad = self.bad or cond
1400 def _VerifyNode(self, ninfo, nresult):
1401 """Run multiple tests against a node.
1405 - compares ganeti version
1406 - checks vg existence and size > 20G
1407 - checks config file checksum
1408 - checks ssh to other nodes
1410 @type ninfo: L{objects.Node}
1411 @param ninfo: the node to check
1412 @param nresult: the results from the node
1414 @return: whether overall this call was successful (and we can expect
1415 reasonable values in the respose)
1419 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1421 # main result, nresult should be a non-empty dict
1422 test = not nresult or not isinstance(nresult, dict)
1423 _ErrorIf(test, self.ENODERPC, node,
1424 "unable to verify node: no data returned")
1428 # compares ganeti version
1429 local_version = constants.PROTOCOL_VERSION
1430 remote_version = nresult.get("version", None)
1431 test = not (remote_version and
1432 isinstance(remote_version, (list, tuple)) and
1433 len(remote_version) == 2)
1434 _ErrorIf(test, self.ENODERPC, node,
1435 "connection to node returned invalid data")
1439 test = local_version != remote_version[0]
1440 _ErrorIf(test, self.ENODEVERSION, node,
1441 "incompatible protocol versions: master %s,"
1442 " node %s", local_version, remote_version[0])
1446 # node seems compatible, we can actually try to look into its results
1448 # full package version
1449 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1450 self.ENODEVERSION, node,
1451 "software version mismatch: master %s, node %s",
1452 constants.RELEASE_VERSION, remote_version[1],
1453 code=self.ETYPE_WARNING)
1455 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1456 if isinstance(hyp_result, dict):
1457 for hv_name, hv_result in hyp_result.iteritems():
1458 test = hv_result is not None
1459 _ErrorIf(test, self.ENODEHV, node,
1460 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1463 test = nresult.get(constants.NV_NODESETUP,
1464 ["Missing NODESETUP results"])
1465 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1470 def _VerifyNodeTime(self, ninfo, nresult,
1471 nvinfo_starttime, nvinfo_endtime):
1472 """Check the node time.
1474 @type ninfo: L{objects.Node}
1475 @param ninfo: the node to check
1476 @param nresult: the remote results for the node
1477 @param nvinfo_starttime: the start time of the RPC call
1478 @param nvinfo_endtime: the end time of the RPC call
1482 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1484 ntime = nresult.get(constants.NV_TIME, None)
1486 ntime_merged = utils.MergeTime(ntime)
1487 except (ValueError, TypeError):
1488 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1491 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1492 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1493 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1494 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1498 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1499 "Node time diverges by at least %s from master node time",
1502 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1503 """Check the node time.
1505 @type ninfo: L{objects.Node}
1506 @param ninfo: the node to check
1507 @param nresult: the remote results for the node
1508 @param vg_name: the configured VG name
1515 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1517 # checks vg existence and size > 20G
1518 vglist = nresult.get(constants.NV_VGLIST, None)
1520 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1522 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1523 constants.MIN_VG_SIZE)
1524 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1527 pvlist = nresult.get(constants.NV_PVLIST, None)
1528 test = pvlist is None
1529 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1531 # check that ':' is not present in PV names, since it's a
1532 # special character for lvcreate (denotes the range of PEs to
1534 for _, pvname, owner_vg in pvlist:
1535 test = ":" in pvname
1536 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1537 " '%s' of VG '%s'", pvname, owner_vg)
1539 def _VerifyNodeNetwork(self, ninfo, nresult):
1540 """Check the node time.
1542 @type ninfo: L{objects.Node}
1543 @param ninfo: the node to check
1544 @param nresult: the remote results for the node
1548 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1550 test = constants.NV_NODELIST not in nresult
1551 _ErrorIf(test, self.ENODESSH, node,
1552 "node hasn't returned node ssh connectivity data")
1554 if nresult[constants.NV_NODELIST]:
1555 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1556 _ErrorIf(True, self.ENODESSH, node,
1557 "ssh communication with node '%s': %s", a_node, a_msg)
1559 test = constants.NV_NODENETTEST not in nresult
1560 _ErrorIf(test, self.ENODENET, node,
1561 "node hasn't returned node tcp connectivity data")
1563 if nresult[constants.NV_NODENETTEST]:
1564 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1566 _ErrorIf(True, self.ENODENET, node,
1567 "tcp communication with node '%s': %s",
1568 anode, nresult[constants.NV_NODENETTEST][anode])
1570 test = constants.NV_MASTERIP not in nresult
1571 _ErrorIf(test, self.ENODENET, node,
1572 "node hasn't returned node master IP reachability data")
1574 if not nresult[constants.NV_MASTERIP]:
1575 if node == self.master_node:
1576 msg = "the master node cannot reach the master IP (not configured?)"
1578 msg = "cannot reach the master IP"
1579 _ErrorIf(True, self.ENODENET, node, msg)
1582 def _VerifyInstance(self, instance, instanceconfig, node_image):
1583 """Verify an instance.
1585 This function checks to see if the required block devices are
1586 available on the instance's node.
1589 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1590 node_current = instanceconfig.primary_node
1592 node_vol_should = {}
1593 instanceconfig.MapLVsByNode(node_vol_should)
1595 for node in node_vol_should:
1596 n_img = node_image[node]
1597 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1598 # ignore missing volumes on offline or broken nodes
1600 for volume in node_vol_should[node]:
1601 test = volume not in n_img.volumes
1602 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1603 "volume %s missing on node %s", volume, node)
1605 if instanceconfig.admin_up:
1606 pri_img = node_image[node_current]
1607 test = instance not in pri_img.instances and not pri_img.offline
1608 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1609 "instance not running on its primary node %s",
1612 for node, n_img in node_image.items():
1613 if (not node == node_current):
1614 test = instance in n_img.instances
1615 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1616 "instance should not run on node %s", node)
1618 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1619 """Verify if there are any unknown volumes in the cluster.
1621 The .os, .swap and backup volumes are ignored. All other volumes are
1622 reported as unknown.
1625 for node, n_img in node_image.items():
1626 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1627 # skip non-healthy nodes
1629 for volume in n_img.volumes:
1630 test = (node not in node_vol_should or
1631 volume not in node_vol_should[node])
1632 self._ErrorIf(test, self.ENODEORPHANLV, node,
1633 "volume %s is unknown", volume)
1635 def _VerifyOrphanInstances(self, instancelist, node_image):
1636 """Verify the list of running instances.
1638 This checks what instances are running but unknown to the cluster.
1641 for node, n_img in node_image.items():
1642 for o_inst in n_img.instances:
1643 test = o_inst not in instancelist
1644 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1645 "instance %s on node %s should not exist", o_inst, node)
1647 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1648 """Verify N+1 Memory Resilience.
1650 Check that if one single node dies we can still start all the
1651 instances it was primary for.
1654 for node, n_img in node_image.items():
1655 # This code checks that every node which is now listed as
1656 # secondary has enough memory to host all instances it is
1657 # supposed to should a single other node in the cluster fail.
1658 # FIXME: not ready for failover to an arbitrary node
1659 # FIXME: does not support file-backed instances
1660 # WARNING: we currently take into account down instances as well
1661 # as up ones, considering that even if they're down someone
1662 # might want to start them even in the event of a node failure.
1663 for prinode, instances in n_img.sbp.items():
1665 for instance in instances:
1666 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1667 if bep[constants.BE_AUTO_BALANCE]:
1668 needed_mem += bep[constants.BE_MEMORY]
1669 test = n_img.mfree < needed_mem
1670 self._ErrorIf(test, self.ENODEN1, node,
1671 "not enough memory on to accommodate"
1672 " failovers should peer node %s fail", prinode)
1674 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1676 """Verifies and computes the node required file checksums.
1678 @type ninfo: L{objects.Node}
1679 @param ninfo: the node to check
1680 @param nresult: the remote results for the node
1681 @param file_list: required list of files
1682 @param local_cksum: dictionary of local files and their checksums
1683 @param master_files: list of files that only masters should have
1687 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1689 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1690 test = not isinstance(remote_cksum, dict)
1691 _ErrorIf(test, self.ENODEFILECHECK, node,
1692 "node hasn't returned file checksum data")
1696 for file_name in file_list:
1697 node_is_mc = ninfo.master_candidate
1698 must_have = (file_name not in master_files) or node_is_mc
1700 test1 = file_name not in remote_cksum
1702 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1704 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1705 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1706 "file '%s' missing", file_name)
1707 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1708 "file '%s' has wrong checksum", file_name)
1709 # not candidate and this is not a must-have file
1710 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1711 "file '%s' should not exist on non master"
1712 " candidates (and the file is outdated)", file_name)
1713 # all good, except non-master/non-must have combination
1714 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1715 "file '%s' should not exist"
1716 " on non master candidates", file_name)
1718 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1719 """Verifies and the node DRBD status.
1721 @type ninfo: L{objects.Node}
1722 @param ninfo: the node to check
1723 @param nresult: the remote results for the node
1724 @param instanceinfo: the dict of instances
1725 @param drbd_map: the DRBD map as returned by
1726 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1730 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1732 # compute the DRBD minors
1734 for minor, instance in drbd_map[node].items():
1735 test = instance not in instanceinfo
1736 _ErrorIf(test, self.ECLUSTERCFG, None,
1737 "ghost instance '%s' in temporary DRBD map", instance)
1738 # ghost instance should not be running, but otherwise we
1739 # don't give double warnings (both ghost instance and
1740 # unallocated minor in use)
1742 node_drbd[minor] = (instance, False)
1744 instance = instanceinfo[instance]
1745 node_drbd[minor] = (instance.name, instance.admin_up)
1747 # and now check them
1748 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1749 test = not isinstance(used_minors, (tuple, list))
1750 _ErrorIf(test, self.ENODEDRBD, node,
1751 "cannot parse drbd status file: %s", str(used_minors))
1753 # we cannot check drbd status
1756 for minor, (iname, must_exist) in node_drbd.items():
1757 test = minor not in used_minors and must_exist
1758 _ErrorIf(test, self.ENODEDRBD, node,
1759 "drbd minor %d of instance %s is not active", minor, iname)
1760 for minor in used_minors:
1761 test = minor not in node_drbd
1762 _ErrorIf(test, self.ENODEDRBD, node,
1763 "unallocated drbd minor %d is in use", minor)
1765 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1766 """Builds the node OS structures.
1768 @type ninfo: L{objects.Node}
1769 @param ninfo: the node to check
1770 @param nresult: the remote results for the node
1771 @param nimg: the node image object
1775 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1777 remote_os = nresult.get(constants.NV_OSLIST, None)
1778 test = (not isinstance(remote_os, list) or
1779 not compat.all(isinstance(v, list) and len(v) == 7
1780 for v in remote_os))
1782 _ErrorIf(test, self.ENODEOS, node,
1783 "node hasn't returned valid OS data")
1792 for (name, os_path, status, diagnose,
1793 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1795 if name not in os_dict:
1798 # parameters is a list of lists instead of list of tuples due to
1799 # JSON lacking a real tuple type, fix it:
1800 parameters = [tuple(v) for v in parameters]
1801 os_dict[name].append((os_path, status, diagnose,
1802 set(variants), set(parameters), set(api_ver)))
1804 nimg.oslist = os_dict
1806 def _VerifyNodeOS(self, ninfo, nimg, base):
1807 """Verifies the node OS list.
1809 @type ninfo: L{objects.Node}
1810 @param ninfo: the node to check
1811 @param nimg: the node image object
1812 @param base: the 'template' node we match against (e.g. from the master)
1816 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1818 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1820 for os_name, os_data in nimg.oslist.items():
1821 assert os_data, "Empty OS status for OS %s?!" % os_name
1822 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1823 _ErrorIf(not f_status, self.ENODEOS, node,
1824 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1825 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1826 "OS '%s' has multiple entries (first one shadows the rest): %s",
1827 os_name, utils.CommaJoin([v[0] for v in os_data]))
1828 # this will catched in backend too
1829 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1830 and not f_var, self.ENODEOS, node,
1831 "OS %s with API at least %d does not declare any variant",
1832 os_name, constants.OS_API_V15)
1833 # comparisons with the 'base' image
1834 test = os_name not in base.oslist
1835 _ErrorIf(test, self.ENODEOS, node,
1836 "Extra OS %s not present on reference node (%s)",
1840 assert base.oslist[os_name], "Base node has empty OS status?"
1841 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1843 # base OS is invalid, skipping
1845 for kind, a, b in [("API version", f_api, b_api),
1846 ("variants list", f_var, b_var),
1847 ("parameters", f_param, b_param)]:
1848 _ErrorIf(a != b, self.ENODEOS, node,
1849 "OS %s %s differs from reference node %s: %s vs. %s",
1850 kind, os_name, base.name,
1851 utils.CommaJoin(a), utils.CommaJoin(b))
1853 # check any missing OSes
1854 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1855 _ErrorIf(missing, self.ENODEOS, node,
1856 "OSes present on reference node %s but missing on this node: %s",
1857 base.name, utils.CommaJoin(missing))
1859 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1860 """Verifies and updates the node volume data.
1862 This function will update a L{NodeImage}'s internal structures
1863 with data from the remote call.
1865 @type ninfo: L{objects.Node}
1866 @param ninfo: the node to check
1867 @param nresult: the remote results for the node
1868 @param nimg: the node image object
1869 @param vg_name: the configured VG name
1873 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1875 nimg.lvm_fail = True
1876 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1879 elif isinstance(lvdata, basestring):
1880 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1881 utils.SafeEncode(lvdata))
1882 elif not isinstance(lvdata, dict):
1883 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1885 nimg.volumes = lvdata
1886 nimg.lvm_fail = False
1888 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1889 """Verifies and updates the node instance list.
1891 If the listing was successful, then updates this node's instance
1892 list. Otherwise, it marks the RPC call as failed for the instance
1895 @type ninfo: L{objects.Node}
1896 @param ninfo: the node to check
1897 @param nresult: the remote results for the node
1898 @param nimg: the node image object
1901 idata = nresult.get(constants.NV_INSTANCELIST, None)
1902 test = not isinstance(idata, list)
1903 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1904 " (instancelist): %s", utils.SafeEncode(str(idata)))
1906 nimg.hyp_fail = True
1908 nimg.instances = idata
1910 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1911 """Verifies and computes a node information map
1913 @type ninfo: L{objects.Node}
1914 @param ninfo: the node to check
1915 @param nresult: the remote results for the node
1916 @param nimg: the node image object
1917 @param vg_name: the configured VG name
1921 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1923 # try to read free memory (from the hypervisor)
1924 hv_info = nresult.get(constants.NV_HVINFO, None)
1925 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1926 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1929 nimg.mfree = int(hv_info["memory_free"])
1930 except (ValueError, TypeError):
1931 _ErrorIf(True, self.ENODERPC, node,
1932 "node returned invalid nodeinfo, check hypervisor")
1934 # FIXME: devise a free space model for file based instances as well
1935 if vg_name is not None:
1936 test = (constants.NV_VGLIST not in nresult or
1937 vg_name not in nresult[constants.NV_VGLIST])
1938 _ErrorIf(test, self.ENODELVM, node,
1939 "node didn't return data for the volume group '%s'"
1940 " - it is either missing or broken", vg_name)
1943 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1944 except (ValueError, TypeError):
1945 _ErrorIf(True, self.ENODERPC, node,
1946 "node returned invalid LVM info, check LVM status")
1948 def BuildHooksEnv(self):
1951 Cluster-Verify hooks just ran in the post phase and their failure makes
1952 the output be logged in the verify output and the verification to fail.
1955 all_nodes = self.cfg.GetNodeList()
1957 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1959 for node in self.cfg.GetAllNodesInfo().values():
1960 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1962 return env, [], all_nodes
1964 def Exec(self, feedback_fn):
1965 """Verify integrity of cluster, performing various test on nodes.
1969 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1970 verbose = self.op.verbose
1971 self._feedback_fn = feedback_fn
1972 feedback_fn("* Verifying global settings")
1973 for msg in self.cfg.VerifyConfig():
1974 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1976 # Check the cluster certificates
1977 for cert_filename in constants.ALL_CERT_FILES:
1978 (errcode, msg) = _VerifyCertificate(cert_filename)
1979 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1981 vg_name = self.cfg.GetVGName()
1982 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1983 cluster = self.cfg.GetClusterInfo()
1984 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1985 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1986 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1987 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1988 for iname in instancelist)
1989 i_non_redundant = [] # Non redundant instances
1990 i_non_a_balanced = [] # Non auto-balanced instances
1991 n_offline = 0 # Count of offline nodes
1992 n_drained = 0 # Count of nodes being drained
1993 node_vol_should = {}
1995 # FIXME: verify OS list
1996 # do local checksums
1997 master_files = [constants.CLUSTER_CONF_FILE]
1998 master_node = self.master_node = self.cfg.GetMasterNode()
1999 master_ip = self.cfg.GetMasterIP()
2001 file_names = ssconf.SimpleStore().GetFileList()
2002 file_names.extend(constants.ALL_CERT_FILES)
2003 file_names.extend(master_files)
2004 if cluster.modify_etc_hosts:
2005 file_names.append(constants.ETC_HOSTS)
2007 local_checksums = utils.FingerprintFiles(file_names)
2009 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2010 node_verify_param = {
2011 constants.NV_FILELIST: file_names,
2012 constants.NV_NODELIST: [node.name for node in nodeinfo
2013 if not node.offline],
2014 constants.NV_HYPERVISOR: hypervisors,
2015 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2016 node.secondary_ip) for node in nodeinfo
2017 if not node.offline],
2018 constants.NV_INSTANCELIST: hypervisors,
2019 constants.NV_VERSION: None,
2020 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2021 constants.NV_NODESETUP: None,
2022 constants.NV_TIME: None,
2023 constants.NV_MASTERIP: (master_node, master_ip),
2024 constants.NV_OSLIST: None,
2027 if vg_name is not None:
2028 node_verify_param[constants.NV_VGLIST] = None
2029 node_verify_param[constants.NV_LVLIST] = vg_name
2030 node_verify_param[constants.NV_PVLIST] = [vg_name]
2031 node_verify_param[constants.NV_DRBDLIST] = None
2033 # Build our expected cluster state
2034 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2036 for node in nodeinfo)
2038 for instance in instancelist:
2039 inst_config = instanceinfo[instance]
2041 for nname in inst_config.all_nodes:
2042 if nname not in node_image:
2044 gnode = self.NodeImage(name=nname)
2046 node_image[nname] = gnode
2048 inst_config.MapLVsByNode(node_vol_should)
2050 pnode = inst_config.primary_node
2051 node_image[pnode].pinst.append(instance)
2053 for snode in inst_config.secondary_nodes:
2054 nimg = node_image[snode]
2055 nimg.sinst.append(instance)
2056 if pnode not in nimg.sbp:
2057 nimg.sbp[pnode] = []
2058 nimg.sbp[pnode].append(instance)
2060 # At this point, we have the in-memory data structures complete,
2061 # except for the runtime information, which we'll gather next
2063 # Due to the way our RPC system works, exact response times cannot be
2064 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2065 # time before and after executing the request, we can at least have a time
2067 nvinfo_starttime = time.time()
2068 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2069 self.cfg.GetClusterName())
2070 nvinfo_endtime = time.time()
2072 all_drbd_map = self.cfg.ComputeDRBDMap()
2074 feedback_fn("* Verifying node status")
2078 for node_i in nodeinfo:
2080 nimg = node_image[node]
2084 feedback_fn("* Skipping offline node %s" % (node,))
2088 if node == master_node:
2090 elif node_i.master_candidate:
2091 ntype = "master candidate"
2092 elif node_i.drained:
2098 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2100 msg = all_nvinfo[node].fail_msg
2101 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2103 nimg.rpc_fail = True
2106 nresult = all_nvinfo[node].payload
2108 nimg.call_ok = self._VerifyNode(node_i, nresult)
2109 self._VerifyNodeNetwork(node_i, nresult)
2110 self._VerifyNodeLVM(node_i, nresult, vg_name)
2111 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2113 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
2114 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2116 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2117 self._UpdateNodeInstances(node_i, nresult, nimg)
2118 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2119 self._UpdateNodeOS(node_i, nresult, nimg)
2120 if not nimg.os_fail:
2121 if refos_img is None:
2123 self._VerifyNodeOS(node_i, nimg, refos_img)
2125 feedback_fn("* Verifying instance status")
2126 for instance in instancelist:
2128 feedback_fn("* Verifying instance %s" % instance)
2129 inst_config = instanceinfo[instance]
2130 self._VerifyInstance(instance, inst_config, node_image)
2131 inst_nodes_offline = []
2133 pnode = inst_config.primary_node
2134 pnode_img = node_image[pnode]
2135 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2136 self.ENODERPC, pnode, "instance %s, connection to"
2137 " primary node failed", instance)
2139 if pnode_img.offline:
2140 inst_nodes_offline.append(pnode)
2142 # If the instance is non-redundant we cannot survive losing its primary
2143 # node, so we are not N+1 compliant. On the other hand we have no disk
2144 # templates with more than one secondary so that situation is not well
2146 # FIXME: does not support file-backed instances
2147 if not inst_config.secondary_nodes:
2148 i_non_redundant.append(instance)
2149 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2150 instance, "instance has multiple secondary nodes: %s",
2151 utils.CommaJoin(inst_config.secondary_nodes),
2152 code=self.ETYPE_WARNING)
2154 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2155 i_non_a_balanced.append(instance)
2157 for snode in inst_config.secondary_nodes:
2158 s_img = node_image[snode]
2159 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2160 "instance %s, connection to secondary node failed", instance)
2163 inst_nodes_offline.append(snode)
2165 # warn that the instance lives on offline nodes
2166 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2167 "instance lives on offline node(s) %s",
2168 utils.CommaJoin(inst_nodes_offline))
2169 # ... or ghost nodes
2170 for node in inst_config.all_nodes:
2171 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2172 "instance lives on ghost node %s", node)
2174 feedback_fn("* Verifying orphan volumes")
2175 self._VerifyOrphanVolumes(node_vol_should, node_image)
2177 feedback_fn("* Verifying orphan instances")
2178 self._VerifyOrphanInstances(instancelist, node_image)
2180 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2181 feedback_fn("* Verifying N+1 Memory redundancy")
2182 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2184 feedback_fn("* Other Notes")
2186 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2187 % len(i_non_redundant))
2189 if i_non_a_balanced:
2190 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2191 % len(i_non_a_balanced))
2194 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2197 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2201 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2202 """Analyze the post-hooks' result
2204 This method analyses the hook result, handles it, and sends some
2205 nicely-formatted feedback back to the user.
2207 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2208 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2209 @param hooks_results: the results of the multi-node hooks rpc call
2210 @param feedback_fn: function used send feedback back to the caller
2211 @param lu_result: previous Exec result
2212 @return: the new Exec result, based on the previous result
2216 # We only really run POST phase hooks, and are only interested in
2218 if phase == constants.HOOKS_PHASE_POST:
2219 # Used to change hooks' output to proper indentation
2220 indent_re = re.compile('^', re.M)
2221 feedback_fn("* Hooks Results")
2222 assert hooks_results, "invalid result from hooks"
2224 for node_name in hooks_results:
2225 res = hooks_results[node_name]
2227 test = msg and not res.offline
2228 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2229 "Communication failure in hooks execution: %s", msg)
2230 if res.offline or msg:
2231 # No need to investigate payload if node is offline or gave an error.
2232 # override manually lu_result here as _ErrorIf only
2233 # overrides self.bad
2236 for script, hkr, output in res.payload:
2237 test = hkr == constants.HKR_FAIL
2238 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2239 "Script %s failed, output:", script)
2241 output = indent_re.sub(' ', output)
2242 feedback_fn("%s" % output)
2248 class LUVerifyDisks(NoHooksLU):
2249 """Verifies the cluster disks status.
2254 def ExpandNames(self):
2255 self.needed_locks = {
2256 locking.LEVEL_NODE: locking.ALL_SET,
2257 locking.LEVEL_INSTANCE: locking.ALL_SET,
2259 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2261 def Exec(self, feedback_fn):
2262 """Verify integrity of cluster disks.
2264 @rtype: tuple of three items
2265 @return: a tuple of (dict of node-to-node_error, list of instances
2266 which need activate-disks, dict of instance: (node, volume) for
2270 result = res_nodes, res_instances, res_missing = {}, [], {}
2272 vg_name = self.cfg.GetVGName()
2273 nodes = utils.NiceSort(self.cfg.GetNodeList())
2274 instances = [self.cfg.GetInstanceInfo(name)
2275 for name in self.cfg.GetInstanceList()]
2278 for inst in instances:
2280 if (not inst.admin_up or
2281 inst.disk_template not in constants.DTS_NET_MIRROR):
2283 inst.MapLVsByNode(inst_lvs)
2284 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2285 for node, vol_list in inst_lvs.iteritems():
2286 for vol in vol_list:
2287 nv_dict[(node, vol)] = inst
2292 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2296 node_res = node_lvs[node]
2297 if node_res.offline:
2299 msg = node_res.fail_msg
2301 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2302 res_nodes[node] = msg
2305 lvs = node_res.payload
2306 for lv_name, (_, _, lv_online) in lvs.items():
2307 inst = nv_dict.pop((node, lv_name), None)
2308 if (not lv_online and inst is not None
2309 and inst.name not in res_instances):
2310 res_instances.append(inst.name)
2312 # any leftover items in nv_dict are missing LVs, let's arrange the
2314 for key, inst in nv_dict.iteritems():
2315 if inst.name not in res_missing:
2316 res_missing[inst.name] = []
2317 res_missing[inst.name].append(key)
2322 class LURepairDiskSizes(NoHooksLU):
2323 """Verifies the cluster disks sizes.
2326 _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))]
2329 def ExpandNames(self):
2330 if self.op.instances:
2331 self.wanted_names = []
2332 for name in self.op.instances:
2333 full_name = _ExpandInstanceName(self.cfg, name)
2334 self.wanted_names.append(full_name)
2335 self.needed_locks = {
2336 locking.LEVEL_NODE: [],
2337 locking.LEVEL_INSTANCE: self.wanted_names,
2339 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2341 self.wanted_names = None
2342 self.needed_locks = {
2343 locking.LEVEL_NODE: locking.ALL_SET,
2344 locking.LEVEL_INSTANCE: locking.ALL_SET,
2346 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2348 def DeclareLocks(self, level):
2349 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2350 self._LockInstancesNodes(primary_only=True)
2352 def CheckPrereq(self):
2353 """Check prerequisites.
2355 This only checks the optional instance list against the existing names.
2358 if self.wanted_names is None:
2359 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2361 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2362 in self.wanted_names]
2364 def _EnsureChildSizes(self, disk):
2365 """Ensure children of the disk have the needed disk size.
2367 This is valid mainly for DRBD8 and fixes an issue where the
2368 children have smaller disk size.
2370 @param disk: an L{ganeti.objects.Disk} object
2373 if disk.dev_type == constants.LD_DRBD8:
2374 assert disk.children, "Empty children for DRBD8?"
2375 fchild = disk.children[0]
2376 mismatch = fchild.size < disk.size
2378 self.LogInfo("Child disk has size %d, parent %d, fixing",
2379 fchild.size, disk.size)
2380 fchild.size = disk.size
2382 # and we recurse on this child only, not on the metadev
2383 return self._EnsureChildSizes(fchild) or mismatch
2387 def Exec(self, feedback_fn):
2388 """Verify the size of cluster disks.
2391 # TODO: check child disks too
2392 # TODO: check differences in size between primary/secondary nodes
2394 for instance in self.wanted_instances:
2395 pnode = instance.primary_node
2396 if pnode not in per_node_disks:
2397 per_node_disks[pnode] = []
2398 for idx, disk in enumerate(instance.disks):
2399 per_node_disks[pnode].append((instance, idx, disk))
2402 for node, dskl in per_node_disks.items():
2403 newl = [v[2].Copy() for v in dskl]
2405 self.cfg.SetDiskID(dsk, node)
2406 result = self.rpc.call_blockdev_getsizes(node, newl)
2408 self.LogWarning("Failure in blockdev_getsizes call to node"
2409 " %s, ignoring", node)
2411 if len(result.data) != len(dskl):
2412 self.LogWarning("Invalid result from node %s, ignoring node results",
2415 for ((instance, idx, disk), size) in zip(dskl, result.data):
2417 self.LogWarning("Disk %d of instance %s did not return size"
2418 " information, ignoring", idx, instance.name)
2420 if not isinstance(size, (int, long)):
2421 self.LogWarning("Disk %d of instance %s did not return valid"
2422 " size information, ignoring", idx, instance.name)
2425 if size != disk.size:
2426 self.LogInfo("Disk %d of instance %s has mismatched size,"
2427 " correcting: recorded %d, actual %d", idx,
2428 instance.name, disk.size, size)
2430 self.cfg.Update(instance, feedback_fn)
2431 changed.append((instance.name, idx, size))
2432 if self._EnsureChildSizes(disk):
2433 self.cfg.Update(instance, feedback_fn)
2434 changed.append((instance.name, idx, disk.size))
2438 class LURenameCluster(LogicalUnit):
2439 """Rename the cluster.
2442 HPATH = "cluster-rename"
2443 HTYPE = constants.HTYPE_CLUSTER
2444 _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)]
2446 def BuildHooksEnv(self):
2451 "OP_TARGET": self.cfg.GetClusterName(),
2452 "NEW_NAME": self.op.name,
2454 mn = self.cfg.GetMasterNode()
2455 all_nodes = self.cfg.GetNodeList()
2456 return env, [mn], all_nodes
2458 def CheckPrereq(self):
2459 """Verify that the passed name is a valid one.
2462 hostname = utils.GetHostInfo(self.op.name)
2464 new_name = hostname.name
2465 self.ip = new_ip = hostname.ip
2466 old_name = self.cfg.GetClusterName()
2467 old_ip = self.cfg.GetMasterIP()
2468 if new_name == old_name and new_ip == old_ip:
2469 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2470 " cluster has changed",
2472 if new_ip != old_ip:
2473 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2474 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2475 " reachable on the network. Aborting." %
2476 new_ip, errors.ECODE_NOTUNIQUE)
2478 self.op.name = new_name
2480 def Exec(self, feedback_fn):
2481 """Rename the cluster.
2484 clustername = self.op.name
2487 # shutdown the master IP
2488 master = self.cfg.GetMasterNode()
2489 result = self.rpc.call_node_stop_master(master, False)
2490 result.Raise("Could not disable the master role")
2493 cluster = self.cfg.GetClusterInfo()
2494 cluster.cluster_name = clustername
2495 cluster.master_ip = ip
2496 self.cfg.Update(cluster, feedback_fn)
2498 # update the known hosts file
2499 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2500 node_list = self.cfg.GetNodeList()
2502 node_list.remove(master)
2505 result = self.rpc.call_upload_file(node_list,
2506 constants.SSH_KNOWN_HOSTS_FILE)
2507 for to_node, to_result in result.iteritems():
2508 msg = to_result.fail_msg
2510 msg = ("Copy of file %s to node %s failed: %s" %
2511 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2512 self.proc.LogWarning(msg)
2515 result = self.rpc.call_node_start_master(master, False, False)
2516 msg = result.fail_msg
2518 self.LogWarning("Could not re-enable the master role on"
2519 " the master, please restart manually: %s", msg)
2522 def _RecursiveCheckIfLVMBased(disk):
2523 """Check if the given disk or its children are lvm-based.
2525 @type disk: L{objects.Disk}
2526 @param disk: the disk to check
2528 @return: boolean indicating whether a LD_LV dev_type was found or not
2532 for chdisk in disk.children:
2533 if _RecursiveCheckIfLVMBased(chdisk):
2535 return disk.dev_type == constants.LD_LV
2538 class LUSetClusterParams(LogicalUnit):
2539 """Change the parameters of the cluster.
2542 HPATH = "cluster-modify"
2543 HTYPE = constants.HTYPE_CLUSTER
2545 ("vg_name", None, _TMaybeString),
2546 ("enabled_hypervisors", None,
2547 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2548 ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2549 ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2550 ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2551 ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2552 ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)),
2553 ("uid_pool", None, _NoType),
2554 ("add_uids", None, _NoType),
2555 ("remove_uids", None, _NoType),
2556 ("maintain_node_health", None, _TMaybeBool),
2557 ("nicparams", None, _TOr(_TDict, _TNone)),
2561 def CheckArguments(self):
2565 if self.op.uid_pool:
2566 uidpool.CheckUidPool(self.op.uid_pool)
2568 if self.op.add_uids:
2569 uidpool.CheckUidPool(self.op.add_uids)
2571 if self.op.remove_uids:
2572 uidpool.CheckUidPool(self.op.remove_uids)
2574 def ExpandNames(self):
2575 # FIXME: in the future maybe other cluster params won't require checking on
2576 # all nodes to be modified.
2577 self.needed_locks = {
2578 locking.LEVEL_NODE: locking.ALL_SET,
2580 self.share_locks[locking.LEVEL_NODE] = 1
2582 def BuildHooksEnv(self):
2587 "OP_TARGET": self.cfg.GetClusterName(),
2588 "NEW_VG_NAME": self.op.vg_name,
2590 mn = self.cfg.GetMasterNode()
2591 return env, [mn], [mn]
2593 def CheckPrereq(self):
2594 """Check prerequisites.
2596 This checks whether the given params don't conflict and
2597 if the given volume group is valid.
2600 if self.op.vg_name is not None and not self.op.vg_name:
2601 instances = self.cfg.GetAllInstancesInfo().values()
2602 for inst in instances:
2603 for disk in inst.disks:
2604 if _RecursiveCheckIfLVMBased(disk):
2605 raise errors.OpPrereqError("Cannot disable lvm storage while"
2606 " lvm-based instances exist",
2609 node_list = self.acquired_locks[locking.LEVEL_NODE]
2611 # if vg_name not None, checks given volume group on all nodes
2613 vglist = self.rpc.call_vg_list(node_list)
2614 for node in node_list:
2615 msg = vglist[node].fail_msg
2617 # ignoring down node
2618 self.LogWarning("Error while gathering data on node %s"
2619 " (ignoring node): %s", node, msg)
2621 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2623 constants.MIN_VG_SIZE)
2625 raise errors.OpPrereqError("Error on node '%s': %s" %
2626 (node, vgstatus), errors.ECODE_ENVIRON)
2628 self.cluster = cluster = self.cfg.GetClusterInfo()
2629 # validate params changes
2630 if self.op.beparams:
2631 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2632 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2634 if self.op.nicparams:
2635 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2636 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2637 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2640 # check all instances for consistency
2641 for instance in self.cfg.GetAllInstancesInfo().values():
2642 for nic_idx, nic in enumerate(instance.nics):
2643 params_copy = copy.deepcopy(nic.nicparams)
2644 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2646 # check parameter syntax
2648 objects.NIC.CheckParameterSyntax(params_filled)
2649 except errors.ConfigurationError, err:
2650 nic_errors.append("Instance %s, nic/%d: %s" %
2651 (instance.name, nic_idx, err))
2653 # if we're moving instances to routed, check that they have an ip
2654 target_mode = params_filled[constants.NIC_MODE]
2655 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2656 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2657 (instance.name, nic_idx))
2659 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2660 "\n".join(nic_errors))
2662 # hypervisor list/parameters
2663 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2664 if self.op.hvparams:
2665 for hv_name, hv_dict in self.op.hvparams.items():
2666 if hv_name not in self.new_hvparams:
2667 self.new_hvparams[hv_name] = hv_dict
2669 self.new_hvparams[hv_name].update(hv_dict)
2671 # os hypervisor parameters
2672 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2674 for os_name, hvs in self.op.os_hvp.items():
2675 if os_name not in self.new_os_hvp:
2676 self.new_os_hvp[os_name] = hvs
2678 for hv_name, hv_dict in hvs.items():
2679 if hv_name not in self.new_os_hvp[os_name]:
2680 self.new_os_hvp[os_name][hv_name] = hv_dict
2682 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2685 self.new_osp = objects.FillDict(cluster.osparams, {})
2686 if self.op.osparams:
2687 for os_name, osp in self.op.osparams.items():
2688 if os_name not in self.new_osp:
2689 self.new_osp[os_name] = {}
2691 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2694 if not self.new_osp[os_name]:
2695 # we removed all parameters
2696 del self.new_osp[os_name]
2698 # check the parameter validity (remote check)
2699 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2700 os_name, self.new_osp[os_name])
2702 # changes to the hypervisor list
2703 if self.op.enabled_hypervisors is not None:
2704 self.hv_list = self.op.enabled_hypervisors
2705 for hv in self.hv_list:
2706 # if the hypervisor doesn't already exist in the cluster
2707 # hvparams, we initialize it to empty, and then (in both
2708 # cases) we make sure to fill the defaults, as we might not
2709 # have a complete defaults list if the hypervisor wasn't
2711 if hv not in new_hvp:
2713 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2714 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2716 self.hv_list = cluster.enabled_hypervisors
2718 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2719 # either the enabled list has changed, or the parameters have, validate
2720 for hv_name, hv_params in self.new_hvparams.items():
2721 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2722 (self.op.enabled_hypervisors and
2723 hv_name in self.op.enabled_hypervisors)):
2724 # either this is a new hypervisor, or its parameters have changed
2725 hv_class = hypervisor.GetHypervisor(hv_name)
2726 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2727 hv_class.CheckParameterSyntax(hv_params)
2728 _CheckHVParams(self, node_list, hv_name, hv_params)
2731 # no need to check any newly-enabled hypervisors, since the
2732 # defaults have already been checked in the above code-block
2733 for os_name, os_hvp in self.new_os_hvp.items():
2734 for hv_name, hv_params in os_hvp.items():
2735 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2736 # we need to fill in the new os_hvp on top of the actual hv_p
2737 cluster_defaults = self.new_hvparams.get(hv_name, {})
2738 new_osp = objects.FillDict(cluster_defaults, hv_params)
2739 hv_class = hypervisor.GetHypervisor(hv_name)
2740 hv_class.CheckParameterSyntax(new_osp)
2741 _CheckHVParams(self, node_list, hv_name, new_osp)
2744 def Exec(self, feedback_fn):
2745 """Change the parameters of the cluster.
2748 if self.op.vg_name is not None:
2749 new_volume = self.op.vg_name
2752 if new_volume != self.cfg.GetVGName():
2753 self.cfg.SetVGName(new_volume)
2755 feedback_fn("Cluster LVM configuration already in desired"
2756 " state, not changing")
2757 if self.op.hvparams:
2758 self.cluster.hvparams = self.new_hvparams
2760 self.cluster.os_hvp = self.new_os_hvp
2761 if self.op.enabled_hypervisors is not None:
2762 self.cluster.hvparams = self.new_hvparams
2763 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2764 if self.op.beparams:
2765 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2766 if self.op.nicparams:
2767 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2768 if self.op.osparams:
2769 self.cluster.osparams = self.new_osp
2771 if self.op.candidate_pool_size is not None:
2772 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2773 # we need to update the pool size here, otherwise the save will fail
2774 _AdjustCandidatePool(self, [])
2776 if self.op.maintain_node_health is not None:
2777 self.cluster.maintain_node_health = self.op.maintain_node_health
2779 if self.op.add_uids is not None:
2780 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2782 if self.op.remove_uids is not None:
2783 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2785 if self.op.uid_pool is not None:
2786 self.cluster.uid_pool = self.op.uid_pool
2788 self.cfg.Update(self.cluster, feedback_fn)
2791 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2792 """Distribute additional files which are part of the cluster configuration.
2794 ConfigWriter takes care of distributing the config and ssconf files, but
2795 there are more files which should be distributed to all nodes. This function
2796 makes sure those are copied.
2798 @param lu: calling logical unit
2799 @param additional_nodes: list of nodes not in the config to distribute to
2802 # 1. Gather target nodes
2803 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2804 dist_nodes = lu.cfg.GetOnlineNodeList()
2805 if additional_nodes is not None:
2806 dist_nodes.extend(additional_nodes)
2807 if myself.name in dist_nodes:
2808 dist_nodes.remove(myself.name)
2810 # 2. Gather files to distribute
2811 dist_files = set([constants.ETC_HOSTS,
2812 constants.SSH_KNOWN_HOSTS_FILE,
2813 constants.RAPI_CERT_FILE,
2814 constants.RAPI_USERS_FILE,
2815 constants.CONFD_HMAC_KEY,
2816 constants.CLUSTER_DOMAIN_SECRET_FILE,
2819 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2820 for hv_name in enabled_hypervisors:
2821 hv_class = hypervisor.GetHypervisor(hv_name)
2822 dist_files.update(hv_class.GetAncillaryFiles())
2824 # 3. Perform the files upload
2825 for fname in dist_files:
2826 if os.path.exists(fname):
2827 result = lu.rpc.call_upload_file(dist_nodes, fname)
2828 for to_node, to_result in result.items():
2829 msg = to_result.fail_msg
2831 msg = ("Copy of file %s to node %s failed: %s" %
2832 (fname, to_node, msg))
2833 lu.proc.LogWarning(msg)
2836 class LURedistributeConfig(NoHooksLU):
2837 """Force the redistribution of cluster configuration.
2839 This is a very simple LU.
2844 def ExpandNames(self):
2845 self.needed_locks = {
2846 locking.LEVEL_NODE: locking.ALL_SET,
2848 self.share_locks[locking.LEVEL_NODE] = 1
2850 def Exec(self, feedback_fn):
2851 """Redistribute the configuration.
2854 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2855 _RedistributeAncillaryFiles(self)
2858 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2859 """Sleep and poll for an instance's disk to sync.
2862 if not instance.disks or disks is not None and not disks:
2865 disks = _ExpandCheckDisks(instance, disks)
2868 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2870 node = instance.primary_node
2873 lu.cfg.SetDiskID(dev, node)
2875 # TODO: Convert to utils.Retry
2878 degr_retries = 10 # in seconds, as we sleep 1 second each time
2882 cumul_degraded = False
2883 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2884 msg = rstats.fail_msg
2886 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2889 raise errors.RemoteError("Can't contact node %s for mirror data,"
2890 " aborting." % node)
2893 rstats = rstats.payload
2895 for i, mstat in enumerate(rstats):
2897 lu.LogWarning("Can't compute data for node %s/%s",
2898 node, disks[i].iv_name)
2901 cumul_degraded = (cumul_degraded or
2902 (mstat.is_degraded and mstat.sync_percent is None))
2903 if mstat.sync_percent is not None:
2905 if mstat.estimated_time is not None:
2906 rem_time = ("%s remaining (estimated)" %
2907 utils.FormatSeconds(mstat.estimated_time))
2908 max_time = mstat.estimated_time
2910 rem_time = "no time estimate"
2911 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2912 (disks[i].iv_name, mstat.sync_percent, rem_time))
2914 # if we're done but degraded, let's do a few small retries, to
2915 # make sure we see a stable and not transient situation; therefore
2916 # we force restart of the loop
2917 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2918 logging.info("Degraded disks found, %d retries left", degr_retries)
2926 time.sleep(min(60, max_time))
2929 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2930 return not cumul_degraded
2933 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2934 """Check that mirrors are not degraded.
2936 The ldisk parameter, if True, will change the test from the
2937 is_degraded attribute (which represents overall non-ok status for
2938 the device(s)) to the ldisk (representing the local storage status).
2941 lu.cfg.SetDiskID(dev, node)
2945 if on_primary or dev.AssembleOnSecondary():
2946 rstats = lu.rpc.call_blockdev_find(node, dev)
2947 msg = rstats.fail_msg
2949 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2951 elif not rstats.payload:
2952 lu.LogWarning("Can't find disk on node %s", node)
2956 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2958 result = result and not rstats.payload.is_degraded
2961 for child in dev.children:
2962 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2967 class LUDiagnoseOS(NoHooksLU):
2968 """Logical unit for OS diagnose/query.
2973 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
2976 _FIELDS_STATIC = utils.FieldSet()
2977 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2978 "parameters", "api_versions")
2980 def CheckArguments(self):
2982 raise errors.OpPrereqError("Selective OS query not supported",
2985 _CheckOutputFields(static=self._FIELDS_STATIC,
2986 dynamic=self._FIELDS_DYNAMIC,
2987 selected=self.op.output_fields)
2989 def ExpandNames(self):
2990 # Lock all nodes, in shared mode
2991 # Temporary removal of locks, should be reverted later
2992 # TODO: reintroduce locks when they are lighter-weight
2993 self.needed_locks = {}
2994 #self.share_locks[locking.LEVEL_NODE] = 1
2995 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2998 def _DiagnoseByOS(rlist):
2999 """Remaps a per-node return list into an a per-os per-node dictionary
3001 @param rlist: a map with node names as keys and OS objects as values
3004 @return: a dictionary with osnames as keys and as value another
3005 map, with nodes as keys and tuples of (path, status, diagnose,
3006 variants, parameters, api_versions) as values, eg::
3008 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3009 (/srv/..., False, "invalid api")],
3010 "node2": [(/srv/..., True, "", [], [])]}
3015 # we build here the list of nodes that didn't fail the RPC (at RPC
3016 # level), so that nodes with a non-responding node daemon don't
3017 # make all OSes invalid
3018 good_nodes = [node_name for node_name in rlist
3019 if not rlist[node_name].fail_msg]
3020 for node_name, nr in rlist.items():
3021 if nr.fail_msg or not nr.payload:
3023 for (name, path, status, diagnose, variants,
3024 params, api_versions) in nr.payload:
3025 if name not in all_os:
3026 # build a list of nodes for this os containing empty lists
3027 # for each node in node_list
3029 for nname in good_nodes:
3030 all_os[name][nname] = []
3031 # convert params from [name, help] to (name, help)
3032 params = [tuple(v) for v in params]
3033 all_os[name][node_name].append((path, status, diagnose,
3034 variants, params, api_versions))
3037 def Exec(self, feedback_fn):
3038 """Compute the list of OSes.
3041 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3042 node_data = self.rpc.call_os_diagnose(valid_nodes)
3043 pol = self._DiagnoseByOS(node_data)
3046 for os_name, os_data in pol.items():
3049 (variants, params, api_versions) = null_state = (set(), set(), set())
3050 for idx, osl in enumerate(os_data.values()):
3051 valid = bool(valid and osl and osl[0][1])
3053 (variants, params, api_versions) = null_state
3055 node_variants, node_params, node_api = osl[0][3:6]
3056 if idx == 0: # first entry
3057 variants = set(node_variants)
3058 params = set(node_params)
3059 api_versions = set(node_api)
3060 else: # keep consistency
3061 variants.intersection_update(node_variants)
3062 params.intersection_update(node_params)
3063 api_versions.intersection_update(node_api)
3065 for field in self.op.output_fields:
3068 elif field == "valid":
3070 elif field == "node_status":
3071 # this is just a copy of the dict
3073 for node_name, nos_list in os_data.items():
3074 val[node_name] = nos_list
3075 elif field == "variants":
3076 val = list(variants)
3077 elif field == "parameters":
3079 elif field == "api_versions":
3080 val = list(api_versions)
3082 raise errors.ParameterError(field)
3089 class LURemoveNode(LogicalUnit):
3090 """Logical unit for removing a node.
3093 HPATH = "node-remove"
3094 HTYPE = constants.HTYPE_NODE
3099 def BuildHooksEnv(self):
3102 This doesn't run on the target node in the pre phase as a failed
3103 node would then be impossible to remove.
3107 "OP_TARGET": self.op.node_name,
3108 "NODE_NAME": self.op.node_name,
3110 all_nodes = self.cfg.GetNodeList()
3112 all_nodes.remove(self.op.node_name)
3114 logging.warning("Node %s which is about to be removed not found"
3115 " in the all nodes list", self.op.node_name)
3116 return env, all_nodes, all_nodes
3118 def CheckPrereq(self):
3119 """Check prerequisites.
3122 - the node exists in the configuration
3123 - it does not have primary or secondary instances
3124 - it's not the master
3126 Any errors are signaled by raising errors.OpPrereqError.
3129 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3130 node = self.cfg.GetNodeInfo(self.op.node_name)
3131 assert node is not None
3133 instance_list = self.cfg.GetInstanceList()
3135 masternode = self.cfg.GetMasterNode()
3136 if node.name == masternode:
3137 raise errors.OpPrereqError("Node is the master node,"
3138 " you need to failover first.",
3141 for instance_name in instance_list:
3142 instance = self.cfg.GetInstanceInfo(instance_name)
3143 if node.name in instance.all_nodes:
3144 raise errors.OpPrereqError("Instance %s is still running on the node,"
3145 " please remove first." % instance_name,
3147 self.op.node_name = node.name
3150 def Exec(self, feedback_fn):
3151 """Removes the node from the cluster.
3155 logging.info("Stopping the node daemon and removing configs from node %s",
3158 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3160 # Promote nodes to master candidate as needed
3161 _AdjustCandidatePool(self, exceptions=[node.name])
3162 self.context.RemoveNode(node.name)
3164 # Run post hooks on the node before it's removed
3165 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3167 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3169 # pylint: disable-msg=W0702
3170 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3172 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3173 msg = result.fail_msg
3175 self.LogWarning("Errors encountered on the remote node while leaving"
3176 " the cluster: %s", msg)
3178 # Remove node from our /etc/hosts
3179 if self.cfg.GetClusterInfo().modify_etc_hosts:
3180 # FIXME: this should be done via an rpc call to node daemon
3181 utils.RemoveHostFromEtcHosts(node.name)
3182 _RedistributeAncillaryFiles(self)
3185 class LUQueryNodes(NoHooksLU):
3186 """Logical unit for querying nodes.
3189 # pylint: disable-msg=W0142
3192 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
3193 ("use_locking", False, _TBool),
3197 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3198 "master_candidate", "offline", "drained"]
3200 _FIELDS_DYNAMIC = utils.FieldSet(
3202 "mtotal", "mnode", "mfree",
3204 "ctotal", "cnodes", "csockets",
3207 _FIELDS_STATIC = utils.FieldSet(*[
3208 "pinst_cnt", "sinst_cnt",
3209 "pinst_list", "sinst_list",
3210 "pip", "sip", "tags",
3212 "role"] + _SIMPLE_FIELDS
3215 def CheckArguments(self):
3216 _CheckOutputFields(static=self._FIELDS_STATIC,
3217 dynamic=self._FIELDS_DYNAMIC,
3218 selected=self.op.output_fields)
3220 def ExpandNames(self):
3221 self.needed_locks = {}
3222 self.share_locks[locking.LEVEL_NODE] = 1
3225 self.wanted = _GetWantedNodes(self, self.op.names)
3227 self.wanted = locking.ALL_SET
3229 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3230 self.do_locking = self.do_node_query and self.op.use_locking
3232 # if we don't request only static fields, we need to lock the nodes
3233 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3235 def Exec(self, feedback_fn):
3236 """Computes the list of nodes and their attributes.
3239 all_info = self.cfg.GetAllNodesInfo()
3241 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3242 elif self.wanted != locking.ALL_SET:
3243 nodenames = self.wanted
3244 missing = set(nodenames).difference(all_info.keys())
3246 raise errors.OpExecError(
3247 "Some nodes were removed before retrieving their data: %s" % missing)
3249 nodenames = all_info.keys()
3251 nodenames = utils.NiceSort(nodenames)
3252 nodelist = [all_info[name] for name in nodenames]
3254 # begin data gathering
3256 if self.do_node_query:
3258 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3259 self.cfg.GetHypervisorType())
3260 for name in nodenames:
3261 nodeinfo = node_data[name]
3262 if not nodeinfo.fail_msg and nodeinfo.payload:
3263 nodeinfo = nodeinfo.payload
3264 fn = utils.TryConvert
3266 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3267 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3268 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3269 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3270 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3271 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3272 "bootid": nodeinfo.get('bootid', None),
3273 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3274 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3277 live_data[name] = {}
3279 live_data = dict.fromkeys(nodenames, {})
3281 node_to_primary = dict([(name, set()) for name in nodenames])
3282 node_to_secondary = dict([(name, set()) for name in nodenames])
3284 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3285 "sinst_cnt", "sinst_list"))
3286 if inst_fields & frozenset(self.op.output_fields):
3287 inst_data = self.cfg.GetAllInstancesInfo()
3289 for inst in inst_data.values():
3290 if inst.primary_node in node_to_primary:
3291 node_to_primary[inst.primary_node].add(inst.name)
3292 for secnode in inst.secondary_nodes:
3293 if secnode in node_to_secondary:
3294 node_to_secondary[secnode].add(inst.name)
3296 master_node = self.cfg.GetMasterNode()
3298 # end data gathering
3301 for node in nodelist:
3303 for field in self.op.output_fields:
3304 if field in self._SIMPLE_FIELDS:
3305 val = getattr(node, field)
3306 elif field == "pinst_list":
3307 val = list(node_to_primary[node.name])
3308 elif field == "sinst_list":
3309 val = list(node_to_secondary[node.name])
3310 elif field == "pinst_cnt":
3311 val = len(node_to_primary[node.name])
3312 elif field == "sinst_cnt":
3313 val = len(node_to_secondary[node.name])
3314 elif field == "pip":
3315 val = node.primary_ip
3316 elif field == "sip":
3317 val = node.secondary_ip
3318 elif field == "tags":
3319 val = list(node.GetTags())
3320 elif field == "master":
3321 val = node.name == master_node
3322 elif self._FIELDS_DYNAMIC.Matches(field):
3323 val = live_data[node.name].get(field, None)
3324 elif field == "role":
3325 if node.name == master_node:
3327 elif node.master_candidate:
3336 raise errors.ParameterError(field)
3337 node_output.append(val)
3338 output.append(node_output)
3343 class LUQueryNodeVolumes(NoHooksLU):
3344 """Logical unit for getting volumes on node(s).
3348 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3349 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3352 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3353 _FIELDS_STATIC = utils.FieldSet("node")
3355 def CheckArguments(self):
3356 _CheckOutputFields(static=self._FIELDS_STATIC,
3357 dynamic=self._FIELDS_DYNAMIC,
3358 selected=self.op.output_fields)
3360 def ExpandNames(self):
3361 self.needed_locks = {}
3362 self.share_locks[locking.LEVEL_NODE] = 1
3363 if not self.op.nodes:
3364 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3366 self.needed_locks[locking.LEVEL_NODE] = \
3367 _GetWantedNodes(self, self.op.nodes)
3369 def Exec(self, feedback_fn):
3370 """Computes the list of nodes and their attributes.
3373 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3374 volumes = self.rpc.call_node_volumes(nodenames)
3376 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3377 in self.cfg.GetInstanceList()]
3379 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3382 for node in nodenames:
3383 nresult = volumes[node]
3386 msg = nresult.fail_msg
3388 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3391 node_vols = nresult.payload[:]
3392 node_vols.sort(key=lambda vol: vol['dev'])
3394 for vol in node_vols:
3396 for field in self.op.output_fields:
3399 elif field == "phys":
3403 elif field == "name":
3405 elif field == "size":
3406 val = int(float(vol['size']))
3407 elif field == "instance":
3409 if node not in lv_by_node[inst]:
3411 if vol['name'] in lv_by_node[inst][node]:
3417 raise errors.ParameterError(field)
3418 node_output.append(str(val))
3420 output.append(node_output)
3425 class LUQueryNodeStorage(NoHooksLU):
3426 """Logical unit for getting information on storage units on node(s).
3429 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3431 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
3432 ("storage_type", _NoDefault, _CheckStorageType),
3433 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
3434 ("name", None, _TMaybeString),
3438 def CheckArguments(self):
3439 _CheckOutputFields(static=self._FIELDS_STATIC,
3440 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3441 selected=self.op.output_fields)
3443 def ExpandNames(self):
3444 self.needed_locks = {}
3445 self.share_locks[locking.LEVEL_NODE] = 1
3448 self.needed_locks[locking.LEVEL_NODE] = \
3449 _GetWantedNodes(self, self.op.nodes)
3451 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3453 def Exec(self, feedback_fn):
3454 """Computes the list of nodes and their attributes.
3457 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3459 # Always get name to sort by
3460 if constants.SF_NAME in self.op.output_fields:
3461 fields = self.op.output_fields[:]
3463 fields = [constants.SF_NAME] + self.op.output_fields
3465 # Never ask for node or type as it's only known to the LU
3466 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3467 while extra in fields:
3468 fields.remove(extra)
3470 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3471 name_idx = field_idx[constants.SF_NAME]
3473 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3474 data = self.rpc.call_storage_list(self.nodes,
3475 self.op.storage_type, st_args,
3476 self.op.name, fields)
3480 for node in utils.NiceSort(self.nodes):
3481 nresult = data[node]
3485 msg = nresult.fail_msg
3487 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3490 rows = dict([(row[name_idx], row) for row in nresult.payload])
3492 for name in utils.NiceSort(rows.keys()):
3497 for field in self.op.output_fields:
3498 if field == constants.SF_NODE:
3500 elif field == constants.SF_TYPE:
3501 val = self.op.storage_type
3502 elif field in field_idx:
3503 val = row[field_idx[field]]
3505 raise errors.ParameterError(field)
3514 class LUModifyNodeStorage(NoHooksLU):
3515 """Logical unit for modifying a storage volume on a node.
3520 ("storage_type", _NoDefault, _CheckStorageType),
3521 ("name", _NoDefault, _TNonEmptyString),
3522 ("changes", _NoDefault, _TDict),
3526 def CheckArguments(self):
3527 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3529 storage_type = self.op.storage_type
3532 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3534 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3535 " modified" % storage_type,
3538 diff = set(self.op.changes.keys()) - modifiable
3540 raise errors.OpPrereqError("The following fields can not be modified for"
3541 " storage units of type '%s': %r" %
3542 (storage_type, list(diff)),
3545 def ExpandNames(self):
3546 self.needed_locks = {
3547 locking.LEVEL_NODE: self.op.node_name,
3550 def Exec(self, feedback_fn):
3551 """Computes the list of nodes and their attributes.
3554 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3555 result = self.rpc.call_storage_modify(self.op.node_name,
3556 self.op.storage_type, st_args,
3557 self.op.name, self.op.changes)
3558 result.Raise("Failed to modify storage unit '%s' on %s" %
3559 (self.op.name, self.op.node_name))
3562 class LUAddNode(LogicalUnit):
3563 """Logical unit for adding node to the cluster.
3567 HTYPE = constants.HTYPE_NODE
3570 ("primary_ip", None, _NoType),
3571 ("secondary_ip", None, _TMaybeString),
3572 ("readd", False, _TBool),
3575 def CheckArguments(self):
3576 # validate/normalize the node name
3577 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3579 def BuildHooksEnv(self):
3582 This will run on all nodes before, and on all nodes + the new node after.
3586 "OP_TARGET": self.op.node_name,
3587 "NODE_NAME": self.op.node_name,
3588 "NODE_PIP": self.op.primary_ip,
3589 "NODE_SIP": self.op.secondary_ip,
3591 nodes_0 = self.cfg.GetNodeList()
3592 nodes_1 = nodes_0 + [self.op.node_name, ]
3593 return env, nodes_0, nodes_1
3595 def CheckPrereq(self):
3596 """Check prerequisites.
3599 - the new node is not already in the config
3601 - its parameters (single/dual homed) matches the cluster
3603 Any errors are signaled by raising errors.OpPrereqError.
3606 node_name = self.op.node_name
3609 dns_data = utils.GetHostInfo(node_name)
3611 node = dns_data.name
3612 primary_ip = self.op.primary_ip = dns_data.ip
3613 if self.op.secondary_ip is None:
3614 self.op.secondary_ip = primary_ip
3615 if not utils.IsValidIP4(self.op.secondary_ip):
3616 raise errors.OpPrereqError("Invalid secondary IP given",
3618 secondary_ip = self.op.secondary_ip
3620 node_list = cfg.GetNodeList()
3621 if not self.op.readd and node in node_list:
3622 raise errors.OpPrereqError("Node %s is already in the configuration" %
3623 node, errors.ECODE_EXISTS)
3624 elif self.op.readd and node not in node_list:
3625 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3628 self.changed_primary_ip = False
3630 for existing_node_name in node_list:
3631 existing_node = cfg.GetNodeInfo(existing_node_name)
3633 if self.op.readd and node == existing_node_name:
3634 if existing_node.secondary_ip != secondary_ip:
3635 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3636 " address configuration as before",
3638 if existing_node.primary_ip != primary_ip:
3639 self.changed_primary_ip = True
3643 if (existing_node.primary_ip == primary_ip or
3644 existing_node.secondary_ip == primary_ip or
3645 existing_node.primary_ip == secondary_ip or
3646 existing_node.secondary_ip == secondary_ip):
3647 raise errors.OpPrereqError("New node ip address(es) conflict with"
3648 " existing node %s" % existing_node.name,
3649 errors.ECODE_NOTUNIQUE)
3651 # check that the type of the node (single versus dual homed) is the
3652 # same as for the master
3653 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3654 master_singlehomed = myself.secondary_ip == myself.primary_ip
3655 newbie_singlehomed = secondary_ip == primary_ip
3656 if master_singlehomed != newbie_singlehomed:
3657 if master_singlehomed:
3658 raise errors.OpPrereqError("The master has no private ip but the"
3659 " new node has one",
3662 raise errors.OpPrereqError("The master has a private ip but the"
3663 " new node doesn't have one",
3666 # checks reachability
3667 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3668 raise errors.OpPrereqError("Node not reachable by ping",
3669 errors.ECODE_ENVIRON)
3671 if not newbie_singlehomed:
3672 # check reachability from my secondary ip to newbie's secondary ip
3673 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3674 source=myself.secondary_ip):
3675 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3676 " based ping to noded port",
3677 errors.ECODE_ENVIRON)
3684 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3687 self.new_node = self.cfg.GetNodeInfo(node)
3688 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3690 self.new_node = objects.Node(name=node,
3691 primary_ip=primary_ip,
3692 secondary_ip=secondary_ip,
3693 master_candidate=self.master_candidate,
3694 offline=False, drained=False)
3696 def Exec(self, feedback_fn):
3697 """Adds the new node to the cluster.
3700 new_node = self.new_node
3701 node = new_node.name
3703 # for re-adds, reset the offline/drained/master-candidate flags;
3704 # we need to reset here, otherwise offline would prevent RPC calls
3705 # later in the procedure; this also means that if the re-add
3706 # fails, we are left with a non-offlined, broken node
3708 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3709 self.LogInfo("Readding a node, the offline/drained flags were reset")
3710 # if we demote the node, we do cleanup later in the procedure
3711 new_node.master_candidate = self.master_candidate
3712 if self.changed_primary_ip:
3713 new_node.primary_ip = self.op.primary_ip
3715 # notify the user about any possible mc promotion
3716 if new_node.master_candidate:
3717 self.LogInfo("Node will be a master candidate")
3719 # check connectivity
3720 result = self.rpc.call_version([node])[node]
3721 result.Raise("Can't get version information from node %s" % node)
3722 if constants.PROTOCOL_VERSION == result.payload:
3723 logging.info("Communication to node %s fine, sw version %s match",
3724 node, result.payload)
3726 raise errors.OpExecError("Version mismatch master version %s,"
3727 " node version %s" %
3728 (constants.PROTOCOL_VERSION, result.payload))
3731 if self.cfg.GetClusterInfo().modify_ssh_setup:
3732 logging.info("Copy ssh key to node %s", node)
3733 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3735 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3736 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3740 keyarray.append(utils.ReadFile(i))
3742 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3743 keyarray[2], keyarray[3], keyarray[4],
3745 result.Raise("Cannot transfer ssh keys to the new node")
3747 # Add node to our /etc/hosts, and add key to known_hosts
3748 if self.cfg.GetClusterInfo().modify_etc_hosts:
3749 # FIXME: this should be done via an rpc call to node daemon
3750 utils.AddHostToEtcHosts(new_node.name)
3752 if new_node.secondary_ip != new_node.primary_ip:
3753 result = self.rpc.call_node_has_ip_address(new_node.name,
3754 new_node.secondary_ip)
3755 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3756 prereq=True, ecode=errors.ECODE_ENVIRON)
3757 if not result.payload:
3758 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3759 " you gave (%s). Please fix and re-run this"
3760 " command." % new_node.secondary_ip)
3762 node_verify_list = [self.cfg.GetMasterNode()]
3763 node_verify_param = {
3764 constants.NV_NODELIST: [node],
3765 # TODO: do a node-net-test as well?
3768 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3769 self.cfg.GetClusterName())
3770 for verifier in node_verify_list:
3771 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3772 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3774 for failed in nl_payload:
3775 feedback_fn("ssh/hostname verification failed"
3776 " (checking from %s): %s" %
3777 (verifier, nl_payload[failed]))
3778 raise errors.OpExecError("ssh/hostname verification failed.")
3781 _RedistributeAncillaryFiles(self)
3782 self.context.ReaddNode(new_node)
3783 # make sure we redistribute the config
3784 self.cfg.Update(new_node, feedback_fn)
3785 # and make sure the new node will not have old files around
3786 if not new_node.master_candidate:
3787 result = self.rpc.call_node_demote_from_mc(new_node.name)
3788 msg = result.fail_msg
3790 self.LogWarning("Node failed to demote itself from master"
3791 " candidate status: %s" % msg)
3793 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3794 self.context.AddNode(new_node, self.proc.GetECId())
3797 class LUSetNodeParams(LogicalUnit):
3798 """Modifies the parameters of a node.
3801 HPATH = "node-modify"
3802 HTYPE = constants.HTYPE_NODE
3805 ("master_candidate", None, _TMaybeBool),
3806 ("offline", None, _TMaybeBool),
3807 ("drained", None, _TMaybeBool),
3808 ("auto_promote", False, _TBool),
3813 def CheckArguments(self):
3814 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3815 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3816 if all_mods.count(None) == 3:
3817 raise errors.OpPrereqError("Please pass at least one modification",
3819 if all_mods.count(True) > 1:
3820 raise errors.OpPrereqError("Can't set the node into more than one"
3821 " state at the same time",
3824 # Boolean value that tells us whether we're offlining or draining the node
3825 self.offline_or_drain = (self.op.offline == True or
3826 self.op.drained == True)
3827 self.deoffline_or_drain = (self.op.offline == False or
3828 self.op.drained == False)
3829 self.might_demote = (self.op.master_candidate == False or
3830 self.offline_or_drain)
3832 self.lock_all = self.op.auto_promote and self.might_demote
3835 def ExpandNames(self):
3837 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3839 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3841 def BuildHooksEnv(self):
3844 This runs on the master node.
3848 "OP_TARGET": self.op.node_name,
3849 "MASTER_CANDIDATE": str(self.op.master_candidate),
3850 "OFFLINE": str(self.op.offline),
3851 "DRAINED": str(self.op.drained),
3853 nl = [self.cfg.GetMasterNode(),
3857 def CheckPrereq(self):
3858 """Check prerequisites.
3860 This only checks the instance list against the existing names.
3863 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3865 if (self.op.master_candidate is not None or
3866 self.op.drained is not None or
3867 self.op.offline is not None):
3868 # we can't change the master's node flags
3869 if self.op.node_name == self.cfg.GetMasterNode():
3870 raise errors.OpPrereqError("The master role can be changed"
3871 " only via masterfailover",
3875 if node.master_candidate and self.might_demote and not self.lock_all:
3876 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3877 # check if after removing the current node, we're missing master
3879 (mc_remaining, mc_should, _) = \
3880 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3881 if mc_remaining < mc_should:
3882 raise errors.OpPrereqError("Not enough master candidates, please"
3883 " pass auto_promote to allow promotion",
3886 if (self.op.master_candidate == True and
3887 ((node.offline and not self.op.offline == False) or
3888 (node.drained and not self.op.drained == False))):
3889 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3890 " to master_candidate" % node.name,
3893 # If we're being deofflined/drained, we'll MC ourself if needed
3894 if (self.deoffline_or_drain and not self.offline_or_drain and not
3895 self.op.master_candidate == True and not node.master_candidate):
3896 self.op.master_candidate = _DecideSelfPromotion(self)
3897 if self.op.master_candidate:
3898 self.LogInfo("Autopromoting node to master candidate")
3902 def Exec(self, feedback_fn):
3911 if self.op.offline is not None:
3912 node.offline = self.op.offline
3913 result.append(("offline", str(self.op.offline)))
3914 if self.op.offline == True:
3915 if node.master_candidate:
3916 node.master_candidate = False
3918 result.append(("master_candidate", "auto-demotion due to offline"))
3920 node.drained = False
3921 result.append(("drained", "clear drained status due to offline"))
3923 if self.op.master_candidate is not None:
3924 node.master_candidate = self.op.master_candidate
3926 result.append(("master_candidate", str(self.op.master_candidate)))
3927 if self.op.master_candidate == False:
3928 rrc = self.rpc.call_node_demote_from_mc(node.name)
3931 self.LogWarning("Node failed to demote itself: %s" % msg)
3933 if self.op.drained is not None:
3934 node.drained = self.op.drained
3935 result.append(("drained", str(self.op.drained)))
3936 if self.op.drained == True:
3937 if node.master_candidate:
3938 node.master_candidate = False
3940 result.append(("master_candidate", "auto-demotion due to drain"))
3941 rrc = self.rpc.call_node_demote_from_mc(node.name)
3944 self.LogWarning("Node failed to demote itself: %s" % msg)
3946 node.offline = False
3947 result.append(("offline", "clear offline status due to drain"))
3949 # we locked all nodes, we adjust the CP before updating this node
3951 _AdjustCandidatePool(self, [node.name])
3953 # this will trigger configuration file update, if needed
3954 self.cfg.Update(node, feedback_fn)
3956 # this will trigger job queue propagation or cleanup
3958 self.context.ReaddNode(node)
3963 class LUPowercycleNode(NoHooksLU):
3964 """Powercycles a node.
3973 def CheckArguments(self):
3974 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3975 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3976 raise errors.OpPrereqError("The node is the master and the force"
3977 " parameter was not set",
3980 def ExpandNames(self):
3981 """Locking for PowercycleNode.
3983 This is a last-resort option and shouldn't block on other
3984 jobs. Therefore, we grab no locks.
3987 self.needed_locks = {}
3989 def Exec(self, feedback_fn):
3993 result = self.rpc.call_node_powercycle(self.op.node_name,
3994 self.cfg.GetHypervisorType())
3995 result.Raise("Failed to schedule the reboot")
3996 return result.payload
3999 class LUQueryClusterInfo(NoHooksLU):
4000 """Query cluster configuration.
4005 def ExpandNames(self):
4006 self.needed_locks = {}
4008 def Exec(self, feedback_fn):
4009 """Return cluster config.
4012 cluster = self.cfg.GetClusterInfo()
4015 # Filter just for enabled hypervisors
4016 for os_name, hv_dict in cluster.os_hvp.items():
4017 os_hvp[os_name] = {}
4018 for hv_name, hv_params in hv_dict.items():
4019 if hv_name in cluster.enabled_hypervisors:
4020 os_hvp[os_name][hv_name] = hv_params
4023 "software_version": constants.RELEASE_VERSION,
4024 "protocol_version": constants.PROTOCOL_VERSION,
4025 "config_version": constants.CONFIG_VERSION,
4026 "os_api_version": max(constants.OS_API_VERSIONS),
4027 "export_version": constants.EXPORT_VERSION,
4028 "architecture": (platform.architecture()[0], platform.machine()),
4029 "name": cluster.cluster_name,
4030 "master": cluster.master_node,
4031 "default_hypervisor": cluster.enabled_hypervisors[0],
4032 "enabled_hypervisors": cluster.enabled_hypervisors,
4033 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4034 for hypervisor_name in cluster.enabled_hypervisors]),
4036 "beparams": cluster.beparams,
4037 "osparams": cluster.osparams,
4038 "nicparams": cluster.nicparams,
4039 "candidate_pool_size": cluster.candidate_pool_size,
4040 "master_netdev": cluster.master_netdev,
4041 "volume_group_name": cluster.volume_group_name,
4042 "file_storage_dir": cluster.file_storage_dir,
4043 "maintain_node_health": cluster.maintain_node_health,
4044 "ctime": cluster.ctime,
4045 "mtime": cluster.mtime,
4046 "uuid": cluster.uuid,
4047 "tags": list(cluster.GetTags()),
4048 "uid_pool": cluster.uid_pool,
4054 class LUQueryConfigValues(NoHooksLU):
4055 """Return configuration values.
4058 _OP_PARAMS = [_POutputFields]
4060 _FIELDS_DYNAMIC = utils.FieldSet()
4061 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4064 def CheckArguments(self):
4065 _CheckOutputFields(static=self._FIELDS_STATIC,
4066 dynamic=self._FIELDS_DYNAMIC,
4067 selected=self.op.output_fields)
4069 def ExpandNames(self):
4070 self.needed_locks = {}
4072 def Exec(self, feedback_fn):
4073 """Dump a representation of the cluster config to the standard output.
4077 for field in self.op.output_fields:
4078 if field == "cluster_name":
4079 entry = self.cfg.GetClusterName()
4080 elif field == "master_node":
4081 entry = self.cfg.GetMasterNode()
4082 elif field == "drain_flag":
4083 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4084 elif field == "watcher_pause":
4085 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4087 raise errors.ParameterError(field)
4088 values.append(entry)
4092 class LUActivateInstanceDisks(NoHooksLU):
4093 """Bring up an instance's disks.
4098 ("ignore_size", False, _TBool),
4102 def ExpandNames(self):
4103 self._ExpandAndLockInstance()
4104 self.needed_locks[locking.LEVEL_NODE] = []
4105 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4107 def DeclareLocks(self, level):
4108 if level == locking.LEVEL_NODE:
4109 self._LockInstancesNodes()
4111 def CheckPrereq(self):
4112 """Check prerequisites.
4114 This checks that the instance is in the cluster.
4117 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4118 assert self.instance is not None, \
4119 "Cannot retrieve locked instance %s" % self.op.instance_name
4120 _CheckNodeOnline(self, self.instance.primary_node)
4122 def Exec(self, feedback_fn):
4123 """Activate the disks.
4126 disks_ok, disks_info = \
4127 _AssembleInstanceDisks(self, self.instance,
4128 ignore_size=self.op.ignore_size)
4130 raise errors.OpExecError("Cannot activate block devices")
4135 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4137 """Prepare the block devices for an instance.
4139 This sets up the block devices on all nodes.
4141 @type lu: L{LogicalUnit}
4142 @param lu: the logical unit on whose behalf we execute
4143 @type instance: L{objects.Instance}
4144 @param instance: the instance for whose disks we assemble
4145 @type disks: list of L{objects.Disk} or None
4146 @param disks: which disks to assemble (or all, if None)
4147 @type ignore_secondaries: boolean
4148 @param ignore_secondaries: if true, errors on secondary nodes
4149 won't result in an error return from the function
4150 @type ignore_size: boolean
4151 @param ignore_size: if true, the current known size of the disk
4152 will not be used during the disk activation, useful for cases
4153 when the size is wrong
4154 @return: False if the operation failed, otherwise a list of
4155 (host, instance_visible_name, node_visible_name)
4156 with the mapping from node devices to instance devices
4161 iname = instance.name
4162 disks = _ExpandCheckDisks(instance, disks)
4164 # With the two passes mechanism we try to reduce the window of
4165 # opportunity for the race condition of switching DRBD to primary
4166 # before handshaking occured, but we do not eliminate it
4168 # The proper fix would be to wait (with some limits) until the
4169 # connection has been made and drbd transitions from WFConnection
4170 # into any other network-connected state (Connected, SyncTarget,
4173 # 1st pass, assemble on all nodes in secondary mode
4174 for inst_disk in disks:
4175 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4177 node_disk = node_disk.Copy()
4178 node_disk.UnsetSize()
4179 lu.cfg.SetDiskID(node_disk, node)
4180 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4181 msg = result.fail_msg
4183 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4184 " (is_primary=False, pass=1): %s",
4185 inst_disk.iv_name, node, msg)
4186 if not ignore_secondaries:
4189 # FIXME: race condition on drbd migration to primary
4191 # 2nd pass, do only the primary node
4192 for inst_disk in disks:
4195 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4196 if node != instance.primary_node:
4199 node_disk = node_disk.Copy()
4200 node_disk.UnsetSize()
4201 lu.cfg.SetDiskID(node_disk, node)
4202 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4203 msg = result.fail_msg
4205 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4206 " (is_primary=True, pass=2): %s",
4207 inst_disk.iv_name, node, msg)
4210 dev_path = result.payload
4212 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4214 # leave the disks configured for the primary node
4215 # this is a workaround that would be fixed better by
4216 # improving the logical/physical id handling
4218 lu.cfg.SetDiskID(disk, instance.primary_node)
4220 return disks_ok, device_info
4223 def _StartInstanceDisks(lu, instance, force):
4224 """Start the disks of an instance.
4227 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4228 ignore_secondaries=force)
4230 _ShutdownInstanceDisks(lu, instance)
4231 if force is not None and not force:
4232 lu.proc.LogWarning("", hint="If the message above refers to a"
4234 " you can retry the operation using '--force'.")
4235 raise errors.OpExecError("Disk consistency error")
4238 class LUDeactivateInstanceDisks(NoHooksLU):
4239 """Shutdown an instance's disks.
4247 def ExpandNames(self):
4248 self._ExpandAndLockInstance()
4249 self.needed_locks[locking.LEVEL_NODE] = []
4250 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4252 def DeclareLocks(self, level):
4253 if level == locking.LEVEL_NODE:
4254 self._LockInstancesNodes()
4256 def CheckPrereq(self):
4257 """Check prerequisites.
4259 This checks that the instance is in the cluster.
4262 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4263 assert self.instance is not None, \
4264 "Cannot retrieve locked instance %s" % self.op.instance_name
4266 def Exec(self, feedback_fn):
4267 """Deactivate the disks
4270 instance = self.instance
4271 _SafeShutdownInstanceDisks(self, instance)
4274 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4275 """Shutdown block devices of an instance.
4277 This function checks if an instance is running, before calling
4278 _ShutdownInstanceDisks.
4281 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4282 _ShutdownInstanceDisks(lu, instance, disks=disks)
4285 def _ExpandCheckDisks(instance, disks):
4286 """Return the instance disks selected by the disks list
4288 @type disks: list of L{objects.Disk} or None
4289 @param disks: selected disks
4290 @rtype: list of L{objects.Disk}
4291 @return: selected instance disks to act on
4295 return instance.disks
4297 if not set(disks).issubset(instance.disks):
4298 raise errors.ProgrammerError("Can only act on disks belonging to the"
4303 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4304 """Shutdown block devices of an instance.
4306 This does the shutdown on all nodes of the instance.
4308 If the ignore_primary is false, errors on the primary node are
4313 disks = _ExpandCheckDisks(instance, disks)
4316 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4317 lu.cfg.SetDiskID(top_disk, node)
4318 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4319 msg = result.fail_msg
4321 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4322 disk.iv_name, node, msg)
4323 if not ignore_primary or node != instance.primary_node:
4328 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4329 """Checks if a node has enough free memory.
4331 This function check if a given node has the needed amount of free
4332 memory. In case the node has less memory or we cannot get the
4333 information from the node, this function raise an OpPrereqError
4336 @type lu: C{LogicalUnit}
4337 @param lu: a logical unit from which we get configuration data
4339 @param node: the node to check
4340 @type reason: C{str}
4341 @param reason: string to use in the error message
4342 @type requested: C{int}
4343 @param requested: the amount of memory in MiB to check for
4344 @type hypervisor_name: C{str}
4345 @param hypervisor_name: the hypervisor to ask for memory stats
4346 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4347 we cannot check the node
4350 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4351 nodeinfo[node].Raise("Can't get data from node %s" % node,
4352 prereq=True, ecode=errors.ECODE_ENVIRON)
4353 free_mem = nodeinfo[node].payload.get('memory_free', None)
4354 if not isinstance(free_mem, int):
4355 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4356 " was '%s'" % (node, free_mem),
4357 errors.ECODE_ENVIRON)
4358 if requested > free_mem:
4359 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4360 " needed %s MiB, available %s MiB" %
4361 (node, reason, requested, free_mem),
4365 def _CheckNodesFreeDisk(lu, nodenames, requested):
4366 """Checks if nodes have enough free disk space in the default VG.
4368 This function check if all given nodes have the needed amount of
4369 free disk. In case any node has less disk or we cannot get the
4370 information from the node, this function raise an OpPrereqError
4373 @type lu: C{LogicalUnit}
4374 @param lu: a logical unit from which we get configuration data
4375 @type nodenames: C{list}
4376 @param nodenames: the list of node names to check
4377 @type requested: C{int}
4378 @param requested: the amount of disk in MiB to check for
4379 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4380 we cannot check the node
4383 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4384 lu.cfg.GetHypervisorType())
4385 for node in nodenames:
4386 info = nodeinfo[node]
4387 info.Raise("Cannot get current information from node %s" % node,
4388 prereq=True, ecode=errors.ECODE_ENVIRON)
4389 vg_free = info.payload.get("vg_free", None)
4390 if not isinstance(vg_free, int):
4391 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4392 " result was '%s'" % (node, vg_free),
4393 errors.ECODE_ENVIRON)
4394 if requested > vg_free:
4395 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4396 " required %d MiB, available %d MiB" %
4397 (node, requested, vg_free),
4401 class LUStartupInstance(LogicalUnit):
4402 """Starts an instance.
4405 HPATH = "instance-start"
4406 HTYPE = constants.HTYPE_INSTANCE
4410 ("hvparams", _EmptyDict, _TDict),
4411 ("beparams", _EmptyDict, _TDict),
4415 def CheckArguments(self):
4417 if self.op.beparams:
4418 # fill the beparams dict
4419 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4421 def ExpandNames(self):
4422 self._ExpandAndLockInstance()
4424 def BuildHooksEnv(self):
4427 This runs on master, primary and secondary nodes of the instance.
4431 "FORCE": self.op.force,
4433 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4434 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4437 def CheckPrereq(self):
4438 """Check prerequisites.
4440 This checks that the instance is in the cluster.
4443 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4444 assert self.instance is not None, \
4445 "Cannot retrieve locked instance %s" % self.op.instance_name
4448 if self.op.hvparams:
4449 # check hypervisor parameter syntax (locally)
4450 cluster = self.cfg.GetClusterInfo()
4451 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4452 filled_hvp = cluster.FillHV(instance)
4453 filled_hvp.update(self.op.hvparams)
4454 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4455 hv_type.CheckParameterSyntax(filled_hvp)
4456 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4458 _CheckNodeOnline(self, instance.primary_node)
4460 bep = self.cfg.GetClusterInfo().FillBE(instance)
4461 # check bridges existence
4462 _CheckInstanceBridgesExist(self, instance)
4464 remote_info = self.rpc.call_instance_info(instance.primary_node,
4466 instance.hypervisor)
4467 remote_info.Raise("Error checking node %s" % instance.primary_node,
4468 prereq=True, ecode=errors.ECODE_ENVIRON)
4469 if not remote_info.payload: # not running already
4470 _CheckNodeFreeMemory(self, instance.primary_node,
4471 "starting instance %s" % instance.name,
4472 bep[constants.BE_MEMORY], instance.hypervisor)
4474 def Exec(self, feedback_fn):
4475 """Start the instance.
4478 instance = self.instance
4479 force = self.op.force
4481 self.cfg.MarkInstanceUp(instance.name)
4483 node_current = instance.primary_node
4485 _StartInstanceDisks(self, instance, force)
4487 result = self.rpc.call_instance_start(node_current, instance,
4488 self.op.hvparams, self.op.beparams)
4489 msg = result.fail_msg
4491 _ShutdownInstanceDisks(self, instance)
4492 raise errors.OpExecError("Could not start instance: %s" % msg)
4495 class LURebootInstance(LogicalUnit):
4496 """Reboot an instance.
4499 HPATH = "instance-reboot"
4500 HTYPE = constants.HTYPE_INSTANCE
4503 ("ignore_secondaries", False, _TBool),
4504 ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)),
4509 def ExpandNames(self):
4510 self._ExpandAndLockInstance()
4512 def BuildHooksEnv(self):
4515 This runs on master, primary and secondary nodes of the instance.
4519 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4520 "REBOOT_TYPE": self.op.reboot_type,
4521 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4523 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4524 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4527 def CheckPrereq(self):
4528 """Check prerequisites.
4530 This checks that the instance is in the cluster.
4533 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4534 assert self.instance is not None, \
4535 "Cannot retrieve locked instance %s" % self.op.instance_name
4537 _CheckNodeOnline(self, instance.primary_node)
4539 # check bridges existence
4540 _CheckInstanceBridgesExist(self, instance)
4542 def Exec(self, feedback_fn):
4543 """Reboot the instance.
4546 instance = self.instance
4547 ignore_secondaries = self.op.ignore_secondaries
4548 reboot_type = self.op.reboot_type
4550 node_current = instance.primary_node
4552 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4553 constants.INSTANCE_REBOOT_HARD]:
4554 for disk in instance.disks:
4555 self.cfg.SetDiskID(disk, node_current)
4556 result = self.rpc.call_instance_reboot(node_current, instance,
4558 self.op.shutdown_timeout)
4559 result.Raise("Could not reboot instance")
4561 result = self.rpc.call_instance_shutdown(node_current, instance,
4562 self.op.shutdown_timeout)
4563 result.Raise("Could not shutdown instance for full reboot")
4564 _ShutdownInstanceDisks(self, instance)
4565 _StartInstanceDisks(self, instance, ignore_secondaries)
4566 result = self.rpc.call_instance_start(node_current, instance, None, None)
4567 msg = result.fail_msg
4569 _ShutdownInstanceDisks(self, instance)
4570 raise errors.OpExecError("Could not start instance for"
4571 " full reboot: %s" % msg)
4573 self.cfg.MarkInstanceUp(instance.name)
4576 class LUShutdownInstance(LogicalUnit):
4577 """Shutdown an instance.
4580 HPATH = "instance-stop"
4581 HTYPE = constants.HTYPE_INSTANCE
4584 ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt),
4588 def ExpandNames(self):
4589 self._ExpandAndLockInstance()
4591 def BuildHooksEnv(self):
4594 This runs on master, primary and secondary nodes of the instance.
4597 env = _BuildInstanceHookEnvByObject(self, self.instance)
4598 env["TIMEOUT"] = self.op.timeout
4599 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4602 def CheckPrereq(self):
4603 """Check prerequisites.
4605 This checks that the instance is in the cluster.
4608 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4609 assert self.instance is not None, \
4610 "Cannot retrieve locked instance %s" % self.op.instance_name
4611 _CheckNodeOnline(self, self.instance.primary_node)
4613 def Exec(self, feedback_fn):
4614 """Shutdown the instance.
4617 instance = self.instance
4618 node_current = instance.primary_node
4619 timeout = self.op.timeout
4620 self.cfg.MarkInstanceDown(instance.name)
4621 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4622 msg = result.fail_msg
4624 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4626 _ShutdownInstanceDisks(self, instance)
4629 class LUReinstallInstance(LogicalUnit):
4630 """Reinstall an instance.
4633 HPATH = "instance-reinstall"
4634 HTYPE = constants.HTYPE_INSTANCE
4637 ("os_type", None, _TMaybeString),
4638 ("force_variant", False, _TBool),
4642 def ExpandNames(self):
4643 self._ExpandAndLockInstance()
4645 def BuildHooksEnv(self):
4648 This runs on master, primary and secondary nodes of the instance.
4651 env = _BuildInstanceHookEnvByObject(self, self.instance)
4652 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4655 def CheckPrereq(self):
4656 """Check prerequisites.
4658 This checks that the instance is in the cluster and is not running.
4661 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4662 assert instance is not None, \
4663 "Cannot retrieve locked instance %s" % self.op.instance_name
4664 _CheckNodeOnline(self, instance.primary_node)
4666 if instance.disk_template == constants.DT_DISKLESS:
4667 raise errors.OpPrereqError("Instance '%s' has no disks" %
4668 self.op.instance_name,
4670 _CheckInstanceDown(self, instance, "cannot reinstall")
4672 if self.op.os_type is not None:
4674 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4675 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4677 self.instance = instance
4679 def Exec(self, feedback_fn):
4680 """Reinstall the instance.
4683 inst = self.instance
4685 if self.op.os_type is not None:
4686 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4687 inst.os = self.op.os_type
4688 self.cfg.Update(inst, feedback_fn)
4690 _StartInstanceDisks(self, inst, None)
4692 feedback_fn("Running the instance OS create scripts...")
4693 # FIXME: pass debug option from opcode to backend
4694 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4695 self.op.debug_level)
4696 result.Raise("Could not install OS for instance %s on node %s" %
4697 (inst.name, inst.primary_node))
4699 _ShutdownInstanceDisks(self, inst)
4702 class LURecreateInstanceDisks(LogicalUnit):
4703 """Recreate an instance's missing disks.
4706 HPATH = "instance-recreate-disks"
4707 HTYPE = constants.HTYPE_INSTANCE
4710 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
4714 def ExpandNames(self):
4715 self._ExpandAndLockInstance()
4717 def BuildHooksEnv(self):
4720 This runs on master, primary and secondary nodes of the instance.
4723 env = _BuildInstanceHookEnvByObject(self, self.instance)
4724 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4727 def CheckPrereq(self):
4728 """Check prerequisites.
4730 This checks that the instance is in the cluster and is not running.
4733 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4734 assert instance is not None, \
4735 "Cannot retrieve locked instance %s" % self.op.instance_name
4736 _CheckNodeOnline(self, instance.primary_node)
4738 if instance.disk_template == constants.DT_DISKLESS:
4739 raise errors.OpPrereqError("Instance '%s' has no disks" %
4740 self.op.instance_name, errors.ECODE_INVAL)
4741 _CheckInstanceDown(self, instance, "cannot recreate disks")
4743 if not self.op.disks:
4744 self.op.disks = range(len(instance.disks))
4746 for idx in self.op.disks:
4747 if idx >= len(instance.disks):
4748 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4751 self.instance = instance
4753 def Exec(self, feedback_fn):
4754 """Recreate the disks.
4758 for idx, _ in enumerate(self.instance.disks):
4759 if idx not in self.op.disks: # disk idx has not been passed in
4763 _CreateDisks(self, self.instance, to_skip=to_skip)
4766 class LURenameInstance(LogicalUnit):
4767 """Rename an instance.
4770 HPATH = "instance-rename"
4771 HTYPE = constants.HTYPE_INSTANCE
4774 ("new_name", _NoDefault, _TNonEmptyString),
4775 ("ignore_ip", False, _TBool),
4776 ("check_name", True, _TBool),
4779 def BuildHooksEnv(self):
4782 This runs on master, primary and secondary nodes of the instance.
4785 env = _BuildInstanceHookEnvByObject(self, self.instance)
4786 env["INSTANCE_NEW_NAME"] = self.op.new_name
4787 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4790 def CheckPrereq(self):
4791 """Check prerequisites.
4793 This checks that the instance is in the cluster and is not running.
4796 self.op.instance_name = _ExpandInstanceName(self.cfg,
4797 self.op.instance_name)
4798 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4799 assert instance is not None
4800 _CheckNodeOnline(self, instance.primary_node)
4801 _CheckInstanceDown(self, instance, "cannot rename")
4802 self.instance = instance
4804 # new name verification
4805 if self.op.check_name:
4806 name_info = utils.GetHostInfo(self.op.new_name)
4807 self.op.new_name = name_info.name
4809 new_name = self.op.new_name
4811 instance_list = self.cfg.GetInstanceList()
4812 if new_name in instance_list:
4813 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4814 new_name, errors.ECODE_EXISTS)
4816 if not self.op.ignore_ip:
4817 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4818 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4819 (name_info.ip, new_name),
4820 errors.ECODE_NOTUNIQUE)
4822 def Exec(self, feedback_fn):
4823 """Reinstall the instance.
4826 inst = self.instance
4827 old_name = inst.name
4829 if inst.disk_template == constants.DT_FILE:
4830 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4832 self.cfg.RenameInstance(inst.name, self.op.new_name)
4833 # Change the instance lock. This is definitely safe while we hold the BGL
4834 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4835 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4837 # re-read the instance from the configuration after rename
4838 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4840 if inst.disk_template == constants.DT_FILE:
4841 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4842 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4843 old_file_storage_dir,
4844 new_file_storage_dir)
4845 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4846 " (but the instance has been renamed in Ganeti)" %
4847 (inst.primary_node, old_file_storage_dir,
4848 new_file_storage_dir))
4850 _StartInstanceDisks(self, inst, None)
4852 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4853 old_name, self.op.debug_level)
4854 msg = result.fail_msg
4856 msg = ("Could not run OS rename script for instance %s on node %s"
4857 " (but the instance has been renamed in Ganeti): %s" %
4858 (inst.name, inst.primary_node, msg))
4859 self.proc.LogWarning(msg)
4861 _ShutdownInstanceDisks(self, inst)
4864 class LURemoveInstance(LogicalUnit):
4865 """Remove an instance.
4868 HPATH = "instance-remove"
4869 HTYPE = constants.HTYPE_INSTANCE
4872 ("ignore_failures", False, _TBool),
4877 def ExpandNames(self):
4878 self._ExpandAndLockInstance()
4879 self.needed_locks[locking.LEVEL_NODE] = []
4880 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4882 def DeclareLocks(self, level):
4883 if level == locking.LEVEL_NODE:
4884 self._LockInstancesNodes()
4886 def BuildHooksEnv(self):
4889 This runs on master, primary and secondary nodes of the instance.
4892 env = _BuildInstanceHookEnvByObject(self, self.instance)
4893 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4894 nl = [self.cfg.GetMasterNode()]
4895 nl_post = list(self.instance.all_nodes) + nl
4896 return env, nl, nl_post
4898 def CheckPrereq(self):
4899 """Check prerequisites.
4901 This checks that the instance is in the cluster.
4904 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4905 assert self.instance is not None, \
4906 "Cannot retrieve locked instance %s" % self.op.instance_name
4908 def Exec(self, feedback_fn):
4909 """Remove the instance.
4912 instance = self.instance
4913 logging.info("Shutting down instance %s on node %s",
4914 instance.name, instance.primary_node)
4916 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4917 self.op.shutdown_timeout)
4918 msg = result.fail_msg
4920 if self.op.ignore_failures:
4921 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4923 raise errors.OpExecError("Could not shutdown instance %s on"
4925 (instance.name, instance.primary_node, msg))
4927 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4930 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4931 """Utility function to remove an instance.
4934 logging.info("Removing block devices for instance %s", instance.name)
4936 if not _RemoveDisks(lu, instance):
4937 if not ignore_failures:
4938 raise errors.OpExecError("Can't remove instance's disks")
4939 feedback_fn("Warning: can't remove instance's disks")
4941 logging.info("Removing instance %s out of cluster config", instance.name)
4943 lu.cfg.RemoveInstance(instance.name)
4945 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4946 "Instance lock removal conflict"
4948 # Remove lock for the instance
4949 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4952 class LUQueryInstances(NoHooksLU):
4953 """Logical unit for querying instances.
4956 # pylint: disable-msg=W0142
4958 ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)),
4959 ("names", _EmptyList, _TListOf(_TNonEmptyString)),
4960 ("use_locking", False, _TBool),
4963 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4964 "serial_no", "ctime", "mtime", "uuid"]
4965 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4967 "disk_template", "ip", "mac", "bridge",
4968 "nic_mode", "nic_link",
4969 "sda_size", "sdb_size", "vcpus", "tags",
4970 "network_port", "beparams",
4971 r"(disk)\.(size)/([0-9]+)",
4972 r"(disk)\.(sizes)", "disk_usage",
4973 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4974 r"(nic)\.(bridge)/([0-9]+)",
4975 r"(nic)\.(macs|ips|modes|links|bridges)",
4976 r"(disk|nic)\.(count)",
4978 ] + _SIMPLE_FIELDS +
4980 for name in constants.HVS_PARAMETERS
4981 if name not in constants.HVC_GLOBALS] +
4983 for name in constants.BES_PARAMETERS])
4984 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4987 def CheckArguments(self):
4988 _CheckOutputFields(static=self._FIELDS_STATIC,
4989 dynamic=self._FIELDS_DYNAMIC,
4990 selected=self.op.output_fields)
4992 def ExpandNames(self):
4993 self.needed_locks = {}
4994 self.share_locks[locking.LEVEL_INSTANCE] = 1
4995 self.share_locks[locking.LEVEL_NODE] = 1
4998 self.wanted = _GetWantedInstances(self, self.op.names)
5000 self.wanted = locking.ALL_SET
5002 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5003 self.do_locking = self.do_node_query and self.op.use_locking
5005 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5006 self.needed_locks[locking.LEVEL_NODE] = []
5007 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5009 def DeclareLocks(self, level):
5010 if level == locking.LEVEL_NODE and self.do_locking:
5011 self._LockInstancesNodes()
5013 def Exec(self, feedback_fn):
5014 """Computes the list of nodes and their attributes.
5017 # pylint: disable-msg=R0912
5018 # way too many branches here
5019 all_info = self.cfg.GetAllInstancesInfo()
5020 if self.wanted == locking.ALL_SET:
5021 # caller didn't specify instance names, so ordering is not important
5023 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5025 instance_names = all_info.keys()
5026 instance_names = utils.NiceSort(instance_names)
5028 # caller did specify names, so we must keep the ordering
5030 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5032 tgt_set = all_info.keys()
5033 missing = set(self.wanted).difference(tgt_set)
5035 raise errors.OpExecError("Some instances were removed before"
5036 " retrieving their data: %s" % missing)
5037 instance_names = self.wanted
5039 instance_list = [all_info[iname] for iname in instance_names]
5041 # begin data gathering
5043 nodes = frozenset([inst.primary_node for inst in instance_list])
5044 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5048 if self.do_node_query:
5050 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5052 result = node_data[name]
5054 # offline nodes will be in both lists
5055 off_nodes.append(name)
5057 bad_nodes.append(name)
5060 live_data.update(result.payload)
5061 # else no instance is alive
5063 live_data = dict([(name, {}) for name in instance_names])
5065 # end data gathering
5070 cluster = self.cfg.GetClusterInfo()
5071 for instance in instance_list:
5073 i_hv = cluster.FillHV(instance, skip_globals=True)
5074 i_be = cluster.FillBE(instance)
5075 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5076 for field in self.op.output_fields:
5077 st_match = self._FIELDS_STATIC.Matches(field)
5078 if field in self._SIMPLE_FIELDS:
5079 val = getattr(instance, field)
5080 elif field == "pnode":
5081 val = instance.primary_node
5082 elif field == "snodes":
5083 val = list(instance.secondary_nodes)
5084 elif field == "admin_state":
5085 val = instance.admin_up
5086 elif field == "oper_state":
5087 if instance.primary_node in bad_nodes:
5090 val = bool(live_data.get(instance.name))
5091 elif field == "status":
5092 if instance.primary_node in off_nodes:
5093 val = "ERROR_nodeoffline"
5094 elif instance.primary_node in bad_nodes:
5095 val = "ERROR_nodedown"
5097 running = bool(live_data.get(instance.name))
5099 if instance.admin_up:
5104 if instance.admin_up:
5108 elif field == "oper_ram":
5109 if instance.primary_node in bad_nodes:
5111 elif instance.name in live_data:
5112 val = live_data[instance.name].get("memory", "?")
5115 elif field == "vcpus":
5116 val = i_be[constants.BE_VCPUS]
5117 elif field == "disk_template":
5118 val = instance.disk_template
5121 val = instance.nics[0].ip
5124 elif field == "nic_mode":
5126 val = i_nicp[0][constants.NIC_MODE]
5129 elif field == "nic_link":
5131 val = i_nicp[0][constants.NIC_LINK]
5134 elif field == "bridge":
5135 if (instance.nics and
5136 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5137 val = i_nicp[0][constants.NIC_LINK]
5140 elif field == "mac":
5142 val = instance.nics[0].mac
5145 elif field == "sda_size" or field == "sdb_size":
5146 idx = ord(field[2]) - ord('a')
5148 val = instance.FindDisk(idx).size
5149 except errors.OpPrereqError:
5151 elif field == "disk_usage": # total disk usage per node
5152 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5153 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5154 elif field == "tags":
5155 val = list(instance.GetTags())
5156 elif field == "hvparams":
5158 elif (field.startswith(HVPREFIX) and
5159 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5160 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5161 val = i_hv.get(field[len(HVPREFIX):], None)
5162 elif field == "beparams":
5164 elif (field.startswith(BEPREFIX) and
5165 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5166 val = i_be.get(field[len(BEPREFIX):], None)
5167 elif st_match and st_match.groups():
5168 # matches a variable list
5169 st_groups = st_match.groups()
5170 if st_groups and st_groups[0] == "disk":
5171 if st_groups[1] == "count":
5172 val = len(instance.disks)
5173 elif st_groups[1] == "sizes":
5174 val = [disk.size for disk in instance.disks]
5175 elif st_groups[1] == "size":
5177 val = instance.FindDisk(st_groups[2]).size
5178 except errors.OpPrereqError:
5181 assert False, "Unhandled disk parameter"
5182 elif st_groups[0] == "nic":
5183 if st_groups[1] == "count":
5184 val = len(instance.nics)
5185 elif st_groups[1] == "macs":
5186 val = [nic.mac for nic in instance.nics]
5187 elif st_groups[1] == "ips":
5188 val = [nic.ip for nic in instance.nics]
5189 elif st_groups[1] == "modes":
5190 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5191 elif st_groups[1] == "links":
5192 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5193 elif st_groups[1] == "bridges":
5196 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5197 val.append(nicp[constants.NIC_LINK])
5202 nic_idx = int(st_groups[2])
5203 if nic_idx >= len(instance.nics):
5206 if st_groups[1] == "mac":
5207 val = instance.nics[nic_idx].mac
5208 elif st_groups[1] == "ip":
5209 val = instance.nics[nic_idx].ip
5210 elif st_groups[1] == "mode":
5211 val = i_nicp[nic_idx][constants.NIC_MODE]
5212 elif st_groups[1] == "link":
5213 val = i_nicp[nic_idx][constants.NIC_LINK]
5214 elif st_groups[1] == "bridge":
5215 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5216 if nic_mode == constants.NIC_MODE_BRIDGED:
5217 val = i_nicp[nic_idx][constants.NIC_LINK]
5221 assert False, "Unhandled NIC parameter"
5223 assert False, ("Declared but unhandled variable parameter '%s'" %
5226 assert False, "Declared but unhandled parameter '%s'" % field
5233 class LUFailoverInstance(LogicalUnit):
5234 """Failover an instance.
5237 HPATH = "instance-failover"
5238 HTYPE = constants.HTYPE_INSTANCE
5241 ("ignore_consistency", False, _TBool),
5246 def ExpandNames(self):
5247 self._ExpandAndLockInstance()
5248 self.needed_locks[locking.LEVEL_NODE] = []
5249 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5251 def DeclareLocks(self, level):
5252 if level == locking.LEVEL_NODE:
5253 self._LockInstancesNodes()
5255 def BuildHooksEnv(self):
5258 This runs on master, primary and secondary nodes of the instance.
5261 instance = self.instance
5262 source_node = instance.primary_node
5263 target_node = instance.secondary_nodes[0]
5265 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5266 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5267 "OLD_PRIMARY": source_node,
5268 "OLD_SECONDARY": target_node,
5269 "NEW_PRIMARY": target_node,
5270 "NEW_SECONDARY": source_node,
5272 env.update(_BuildInstanceHookEnvByObject(self, instance))
5273 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5275 nl_post.append(source_node)
5276 return env, nl, nl_post
5278 def CheckPrereq(self):
5279 """Check prerequisites.
5281 This checks that the instance is in the cluster.
5284 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5285 assert self.instance is not None, \
5286 "Cannot retrieve locked instance %s" % self.op.instance_name
5288 bep = self.cfg.GetClusterInfo().FillBE(instance)
5289 if instance.disk_template not in constants.DTS_NET_MIRROR:
5290 raise errors.OpPrereqError("Instance's disk layout is not"
5291 " network mirrored, cannot failover.",
5294 secondary_nodes = instance.secondary_nodes
5295 if not secondary_nodes:
5296 raise errors.ProgrammerError("no secondary node but using "
5297 "a mirrored disk template")
5299 target_node = secondary_nodes[0]
5300 _CheckNodeOnline(self, target_node)
5301 _CheckNodeNotDrained(self, target_node)
5302 if instance.admin_up:
5303 # check memory requirements on the secondary node
5304 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5305 instance.name, bep[constants.BE_MEMORY],
5306 instance.hypervisor)
5308 self.LogInfo("Not checking memory on the secondary node as"
5309 " instance will not be started")
5311 # check bridge existance
5312 _CheckInstanceBridgesExist(self, instance, node=target_node)
5314 def Exec(self, feedback_fn):
5315 """Failover an instance.
5317 The failover is done by shutting it down on its present node and
5318 starting it on the secondary.
5321 instance = self.instance
5323 source_node = instance.primary_node
5324 target_node = instance.secondary_nodes[0]
5326 if instance.admin_up:
5327 feedback_fn("* checking disk consistency between source and target")
5328 for dev in instance.disks:
5329 # for drbd, these are drbd over lvm
5330 if not _CheckDiskConsistency(self, dev, target_node, False):
5331 if not self.op.ignore_consistency:
5332 raise errors.OpExecError("Disk %s is degraded on target node,"
5333 " aborting failover." % dev.iv_name)
5335 feedback_fn("* not checking disk consistency as instance is not running")
5337 feedback_fn("* shutting down instance on source node")
5338 logging.info("Shutting down instance %s on node %s",
5339 instance.name, source_node)
5341 result = self.rpc.call_instance_shutdown(source_node, instance,
5342 self.op.shutdown_timeout)
5343 msg = result.fail_msg
5345 if self.op.ignore_consistency:
5346 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5347 " Proceeding anyway. Please make sure node"
5348 " %s is down. Error details: %s",
5349 instance.name, source_node, source_node, msg)
5351 raise errors.OpExecError("Could not shutdown instance %s on"
5353 (instance.name, source_node, msg))
5355 feedback_fn("* deactivating the instance's disks on source node")
5356 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5357 raise errors.OpExecError("Can't shut down the instance's disks.")
5359 instance.primary_node = target_node
5360 # distribute new instance config to the other nodes
5361 self.cfg.Update(instance, feedback_fn)
5363 # Only start the instance if it's marked as up
5364 if instance.admin_up:
5365 feedback_fn("* activating the instance's disks on target node")
5366 logging.info("Starting instance %s on node %s",
5367 instance.name, target_node)
5369 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5370 ignore_secondaries=True)
5372 _ShutdownInstanceDisks(self, instance)
5373 raise errors.OpExecError("Can't activate the instance's disks")
5375 feedback_fn("* starting the instance on the target node")
5376 result = self.rpc.call_instance_start(target_node, instance, None, None)
5377 msg = result.fail_msg
5379 _ShutdownInstanceDisks(self, instance)
5380 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5381 (instance.name, target_node, msg))
5384 class LUMigrateInstance(LogicalUnit):
5385 """Migrate an instance.
5387 This is migration without shutting down, compared to the failover,
5388 which is done with shutdown.
5391 HPATH = "instance-migrate"
5392 HTYPE = constants.HTYPE_INSTANCE
5395 ("live", True, _TBool),
5396 ("cleanup", False, _TBool),
5401 def ExpandNames(self):
5402 self._ExpandAndLockInstance()
5404 self.needed_locks[locking.LEVEL_NODE] = []
5405 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5407 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5408 self.op.live, self.op.cleanup)
5409 self.tasklets = [self._migrater]
5411 def DeclareLocks(self, level):
5412 if level == locking.LEVEL_NODE:
5413 self._LockInstancesNodes()
5415 def BuildHooksEnv(self):
5418 This runs on master, primary and secondary nodes of the instance.
5421 instance = self._migrater.instance
5422 source_node = instance.primary_node
5423 target_node = instance.secondary_nodes[0]
5424 env = _BuildInstanceHookEnvByObject(self, instance)
5425 env["MIGRATE_LIVE"] = self.op.live
5426 env["MIGRATE_CLEANUP"] = self.op.cleanup
5428 "OLD_PRIMARY": source_node,
5429 "OLD_SECONDARY": target_node,
5430 "NEW_PRIMARY": target_node,
5431 "NEW_SECONDARY": source_node,
5433 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5435 nl_post.append(source_node)
5436 return env, nl, nl_post
5439 class LUMoveInstance(LogicalUnit):
5440 """Move an instance by data-copying.
5443 HPATH = "instance-move"
5444 HTYPE = constants.HTYPE_INSTANCE
5447 ("target_node", _NoDefault, _TNonEmptyString),
5452 def ExpandNames(self):
5453 self._ExpandAndLockInstance()
5454 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5455 self.op.target_node = target_node
5456 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5457 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5459 def DeclareLocks(self, level):
5460 if level == locking.LEVEL_NODE:
5461 self._LockInstancesNodes(primary_only=True)
5463 def BuildHooksEnv(self):
5466 This runs on master, primary and secondary nodes of the instance.
5470 "TARGET_NODE": self.op.target_node,
5471 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5473 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5474 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5475 self.op.target_node]
5478 def CheckPrereq(self):
5479 """Check prerequisites.
5481 This checks that the instance is in the cluster.
5484 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5485 assert self.instance is not None, \
5486 "Cannot retrieve locked instance %s" % self.op.instance_name
5488 node = self.cfg.GetNodeInfo(self.op.target_node)
5489 assert node is not None, \
5490 "Cannot retrieve locked node %s" % self.op.target_node
5492 self.target_node = target_node = node.name
5494 if target_node == instance.primary_node:
5495 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5496 (instance.name, target_node),
5499 bep = self.cfg.GetClusterInfo().FillBE(instance)
5501 for idx, dsk in enumerate(instance.disks):
5502 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5503 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5504 " cannot copy" % idx, errors.ECODE_STATE)
5506 _CheckNodeOnline(self, target_node)
5507 _CheckNodeNotDrained(self, target_node)
5509 if instance.admin_up:
5510 # check memory requirements on the secondary node
5511 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5512 instance.name, bep[constants.BE_MEMORY],
5513 instance.hypervisor)
5515 self.LogInfo("Not checking memory on the secondary node as"
5516 " instance will not be started")
5518 # check bridge existance
5519 _CheckInstanceBridgesExist(self, instance, node=target_node)
5521 def Exec(self, feedback_fn):
5522 """Move an instance.
5524 The move is done by shutting it down on its present node, copying
5525 the data over (slow) and starting it on the new node.
5528 instance = self.instance
5530 source_node = instance.primary_node
5531 target_node = self.target_node
5533 self.LogInfo("Shutting down instance %s on source node %s",
5534 instance.name, source_node)
5536 result = self.rpc.call_instance_shutdown(source_node, instance,
5537 self.op.shutdown_timeout)
5538 msg = result.fail_msg
5540 if self.op.ignore_consistency:
5541 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5542 " Proceeding anyway. Please make sure node"
5543 " %s is down. Error details: %s",
5544 instance.name, source_node, source_node, msg)
5546 raise errors.OpExecError("Could not shutdown instance %s on"
5548 (instance.name, source_node, msg))
5550 # create the target disks
5552 _CreateDisks(self, instance, target_node=target_node)
5553 except errors.OpExecError:
5554 self.LogWarning("Device creation failed, reverting...")
5556 _RemoveDisks(self, instance, target_node=target_node)
5558 self.cfg.ReleaseDRBDMinors(instance.name)
5561 cluster_name = self.cfg.GetClusterInfo().cluster_name
5564 # activate, get path, copy the data over
5565 for idx, disk in enumerate(instance.disks):
5566 self.LogInfo("Copying data for disk %d", idx)
5567 result = self.rpc.call_blockdev_assemble(target_node, disk,
5568 instance.name, True)
5570 self.LogWarning("Can't assemble newly created disk %d: %s",
5571 idx, result.fail_msg)
5572 errs.append(result.fail_msg)
5574 dev_path = result.payload
5575 result = self.rpc.call_blockdev_export(source_node, disk,
5576 target_node, dev_path,
5579 self.LogWarning("Can't copy data over for disk %d: %s",
5580 idx, result.fail_msg)
5581 errs.append(result.fail_msg)
5585 self.LogWarning("Some disks failed to copy, aborting")
5587 _RemoveDisks(self, instance, target_node=target_node)
5589 self.cfg.ReleaseDRBDMinors(instance.name)
5590 raise errors.OpExecError("Errors during disk copy: %s" %
5593 instance.primary_node = target_node
5594 self.cfg.Update(instance, feedback_fn)
5596 self.LogInfo("Removing the disks on the original node")
5597 _RemoveDisks(self, instance, target_node=source_node)
5599 # Only start the instance if it's marked as up
5600 if instance.admin_up:
5601 self.LogInfo("Starting instance %s on node %s",
5602 instance.name, target_node)
5604 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5605 ignore_secondaries=True)
5607 _ShutdownInstanceDisks(self, instance)
5608 raise errors.OpExecError("Can't activate the instance's disks")
5610 result = self.rpc.call_instance_start(target_node, instance, None, None)
5611 msg = result.fail_msg
5613 _ShutdownInstanceDisks(self, instance)
5614 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5615 (instance.name, target_node, msg))
5618 class LUMigrateNode(LogicalUnit):
5619 """Migrate all instances from a node.
5622 HPATH = "node-migrate"
5623 HTYPE = constants.HTYPE_NODE
5626 ("live", False, _TBool),
5630 def ExpandNames(self):
5631 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5633 self.needed_locks = {
5634 locking.LEVEL_NODE: [self.op.node_name],
5637 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5639 # Create tasklets for migrating instances for all instances on this node
5643 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5644 logging.debug("Migrating instance %s", inst.name)
5645 names.append(inst.name)
5647 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5649 self.tasklets = tasklets
5651 # Declare instance locks
5652 self.needed_locks[locking.LEVEL_INSTANCE] = names
5654 def DeclareLocks(self, level):
5655 if level == locking.LEVEL_NODE:
5656 self._LockInstancesNodes()
5658 def BuildHooksEnv(self):
5661 This runs on the master, the primary and all the secondaries.
5665 "NODE_NAME": self.op.node_name,
5668 nl = [self.cfg.GetMasterNode()]
5670 return (env, nl, nl)
5673 class TLMigrateInstance(Tasklet):
5674 def __init__(self, lu, instance_name, live, cleanup):
5675 """Initializes this class.
5678 Tasklet.__init__(self, lu)
5681 self.instance_name = instance_name
5683 self.cleanup = cleanup
5685 def CheckPrereq(self):
5686 """Check prerequisites.
5688 This checks that the instance is in the cluster.
5691 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5692 instance = self.cfg.GetInstanceInfo(instance_name)
5693 assert instance is not None
5695 if instance.disk_template != constants.DT_DRBD8:
5696 raise errors.OpPrereqError("Instance's disk layout is not"
5697 " drbd8, cannot migrate.", errors.ECODE_STATE)
5699 secondary_nodes = instance.secondary_nodes
5700 if not secondary_nodes:
5701 raise errors.ConfigurationError("No secondary node but using"
5702 " drbd8 disk template")
5704 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5706 target_node = secondary_nodes[0]
5707 # check memory requirements on the secondary node
5708 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5709 instance.name, i_be[constants.BE_MEMORY],
5710 instance.hypervisor)
5712 # check bridge existance
5713 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5715 if not self.cleanup:
5716 _CheckNodeNotDrained(self.lu, target_node)
5717 result = self.rpc.call_instance_migratable(instance.primary_node,
5719 result.Raise("Can't migrate, please use failover",
5720 prereq=True, ecode=errors.ECODE_STATE)
5722 self.instance = instance
5724 def _WaitUntilSync(self):
5725 """Poll with custom rpc for disk sync.
5727 This uses our own step-based rpc call.
5730 self.feedback_fn("* wait until resync is done")
5734 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5736 self.instance.disks)
5738 for node, nres in result.items():
5739 nres.Raise("Cannot resync disks on node %s" % node)
5740 node_done, node_percent = nres.payload
5741 all_done = all_done and node_done
5742 if node_percent is not None:
5743 min_percent = min(min_percent, node_percent)
5745 if min_percent < 100:
5746 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5749 def _EnsureSecondary(self, node):
5750 """Demote a node to secondary.
5753 self.feedback_fn("* switching node %s to secondary mode" % node)
5755 for dev in self.instance.disks:
5756 self.cfg.SetDiskID(dev, node)
5758 result = self.rpc.call_blockdev_close(node, self.instance.name,
5759 self.instance.disks)
5760 result.Raise("Cannot change disk to secondary on node %s" % node)
5762 def _GoStandalone(self):
5763 """Disconnect from the network.
5766 self.feedback_fn("* changing into standalone mode")
5767 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5768 self.instance.disks)
5769 for node, nres in result.items():
5770 nres.Raise("Cannot disconnect disks node %s" % node)
5772 def _GoReconnect(self, multimaster):
5773 """Reconnect to the network.
5779 msg = "single-master"
5780 self.feedback_fn("* changing disks into %s mode" % msg)
5781 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5782 self.instance.disks,
5783 self.instance.name, multimaster)
5784 for node, nres in result.items():
5785 nres.Raise("Cannot change disks config on node %s" % node)
5787 def _ExecCleanup(self):
5788 """Try to cleanup after a failed migration.
5790 The cleanup is done by:
5791 - check that the instance is running only on one node
5792 (and update the config if needed)
5793 - change disks on its secondary node to secondary
5794 - wait until disks are fully synchronized
5795 - disconnect from the network
5796 - change disks into single-master mode
5797 - wait again until disks are fully synchronized
5800 instance = self.instance
5801 target_node = self.target_node
5802 source_node = self.source_node
5804 # check running on only one node
5805 self.feedback_fn("* checking where the instance actually runs"
5806 " (if this hangs, the hypervisor might be in"
5808 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5809 for node, result in ins_l.items():
5810 result.Raise("Can't contact node %s" % node)
5812 runningon_source = instance.name in ins_l[source_node].payload
5813 runningon_target = instance.name in ins_l[target_node].payload
5815 if runningon_source and runningon_target:
5816 raise errors.OpExecError("Instance seems to be running on two nodes,"
5817 " or the hypervisor is confused. You will have"
5818 " to ensure manually that it runs only on one"
5819 " and restart this operation.")
5821 if not (runningon_source or runningon_target):
5822 raise errors.OpExecError("Instance does not seem to be running at all."
5823 " In this case, it's safer to repair by"
5824 " running 'gnt-instance stop' to ensure disk"
5825 " shutdown, and then restarting it.")
5827 if runningon_target:
5828 # the migration has actually succeeded, we need to update the config
5829 self.feedback_fn("* instance running on secondary node (%s),"
5830 " updating config" % target_node)
5831 instance.primary_node = target_node
5832 self.cfg.Update(instance, self.feedback_fn)
5833 demoted_node = source_node
5835 self.feedback_fn("* instance confirmed to be running on its"
5836 " primary node (%s)" % source_node)
5837 demoted_node = target_node
5839 self._EnsureSecondary(demoted_node)
5841 self._WaitUntilSync()
5842 except errors.OpExecError:
5843 # we ignore here errors, since if the device is standalone, it
5844 # won't be able to sync
5846 self._GoStandalone()
5847 self._GoReconnect(False)
5848 self._WaitUntilSync()
5850 self.feedback_fn("* done")
5852 def _RevertDiskStatus(self):
5853 """Try to revert the disk status after a failed migration.
5856 target_node = self.target_node
5858 self._EnsureSecondary(target_node)
5859 self._GoStandalone()
5860 self._GoReconnect(False)
5861 self._WaitUntilSync()
5862 except errors.OpExecError, err:
5863 self.lu.LogWarning("Migration failed and I can't reconnect the"
5864 " drives: error '%s'\n"
5865 "Please look and recover the instance status" %
5868 def _AbortMigration(self):
5869 """Call the hypervisor code to abort a started migration.
5872 instance = self.instance
5873 target_node = self.target_node
5874 migration_info = self.migration_info
5876 abort_result = self.rpc.call_finalize_migration(target_node,
5880 abort_msg = abort_result.fail_msg
5882 logging.error("Aborting migration failed on target node %s: %s",
5883 target_node, abort_msg)
5884 # Don't raise an exception here, as we stil have to try to revert the
5885 # disk status, even if this step failed.
5887 def _ExecMigration(self):
5888 """Migrate an instance.
5890 The migrate is done by:
5891 - change the disks into dual-master mode
5892 - wait until disks are fully synchronized again
5893 - migrate the instance
5894 - change disks on the new secondary node (the old primary) to secondary
5895 - wait until disks are fully synchronized
5896 - change disks into single-master mode
5899 instance = self.instance
5900 target_node = self.target_node
5901 source_node = self.source_node
5903 self.feedback_fn("* checking disk consistency between source and target")
5904 for dev in instance.disks:
5905 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5906 raise errors.OpExecError("Disk %s is degraded or not fully"
5907 " synchronized on target node,"
5908 " aborting migrate." % dev.iv_name)
5910 # First get the migration information from the remote node
5911 result = self.rpc.call_migration_info(source_node, instance)
5912 msg = result.fail_msg
5914 log_err = ("Failed fetching source migration information from %s: %s" %
5916 logging.error(log_err)
5917 raise errors.OpExecError(log_err)
5919 self.migration_info = migration_info = result.payload
5921 # Then switch the disks to master/master mode
5922 self._EnsureSecondary(target_node)
5923 self._GoStandalone()
5924 self._GoReconnect(True)
5925 self._WaitUntilSync()
5927 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5928 result = self.rpc.call_accept_instance(target_node,
5931 self.nodes_ip[target_node])
5933 msg = result.fail_msg
5935 logging.error("Instance pre-migration failed, trying to revert"
5936 " disk status: %s", msg)
5937 self.feedback_fn("Pre-migration failed, aborting")
5938 self._AbortMigration()
5939 self._RevertDiskStatus()
5940 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5941 (instance.name, msg))
5943 self.feedback_fn("* migrating instance to %s" % target_node)
5945 result = self.rpc.call_instance_migrate(source_node, instance,
5946 self.nodes_ip[target_node],
5948 msg = result.fail_msg
5950 logging.error("Instance migration failed, trying to revert"
5951 " disk status: %s", msg)
5952 self.feedback_fn("Migration failed, aborting")
5953 self._AbortMigration()
5954 self._RevertDiskStatus()
5955 raise errors.OpExecError("Could not migrate instance %s: %s" %
5956 (instance.name, msg))
5959 instance.primary_node = target_node
5960 # distribute new instance config to the other nodes
5961 self.cfg.Update(instance, self.feedback_fn)
5963 result = self.rpc.call_finalize_migration(target_node,
5967 msg = result.fail_msg
5969 logging.error("Instance migration succeeded, but finalization failed:"
5971 raise errors.OpExecError("Could not finalize instance migration: %s" %
5974 self._EnsureSecondary(source_node)
5975 self._WaitUntilSync()
5976 self._GoStandalone()
5977 self._GoReconnect(False)
5978 self._WaitUntilSync()
5980 self.feedback_fn("* done")
5982 def Exec(self, feedback_fn):
5983 """Perform the migration.
5986 feedback_fn("Migrating instance %s" % self.instance.name)
5988 self.feedback_fn = feedback_fn
5990 self.source_node = self.instance.primary_node
5991 self.target_node = self.instance.secondary_nodes[0]
5992 self.all_nodes = [self.source_node, self.target_node]
5994 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5995 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5999 return self._ExecCleanup()
6001 return self._ExecMigration()
6004 def _CreateBlockDev(lu, node, instance, device, force_create,
6006 """Create a tree of block devices on a given node.
6008 If this device type has to be created on secondaries, create it and
6011 If not, just recurse to children keeping the same 'force' value.
6013 @param lu: the lu on whose behalf we execute
6014 @param node: the node on which to create the device
6015 @type instance: L{objects.Instance}
6016 @param instance: the instance which owns the device
6017 @type device: L{objects.Disk}
6018 @param device: the device to create
6019 @type force_create: boolean
6020 @param force_create: whether to force creation of this device; this
6021 will be change to True whenever we find a device which has
6022 CreateOnSecondary() attribute
6023 @param info: the extra 'metadata' we should attach to the device
6024 (this will be represented as a LVM tag)
6025 @type force_open: boolean
6026 @param force_open: this parameter will be passes to the
6027 L{backend.BlockdevCreate} function where it specifies
6028 whether we run on primary or not, and it affects both
6029 the child assembly and the device own Open() execution
6032 if device.CreateOnSecondary():
6036 for child in device.children:
6037 _CreateBlockDev(lu, node, instance, child, force_create,
6040 if not force_create:
6043 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6046 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6047 """Create a single block device on a given node.
6049 This will not recurse over children of the device, so they must be
6052 @param lu: the lu on whose behalf we execute
6053 @param node: the node on which to create the device
6054 @type instance: L{objects.Instance}
6055 @param instance: the instance which owns the device
6056 @type device: L{objects.Disk}
6057 @param device: the device to create
6058 @param info: the extra 'metadata' we should attach to the device
6059 (this will be represented as a LVM tag)
6060 @type force_open: boolean
6061 @param force_open: this parameter will be passes to the
6062 L{backend.BlockdevCreate} function where it specifies
6063 whether we run on primary or not, and it affects both
6064 the child assembly and the device own Open() execution
6067 lu.cfg.SetDiskID(device, node)
6068 result = lu.rpc.call_blockdev_create(node, device, device.size,
6069 instance.name, force_open, info)
6070 result.Raise("Can't create block device %s on"
6071 " node %s for instance %s" % (device, node, instance.name))
6072 if device.physical_id is None:
6073 device.physical_id = result.payload
6076 def _GenerateUniqueNames(lu, exts):
6077 """Generate a suitable LV name.
6079 This will generate a logical volume name for the given instance.
6084 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6085 results.append("%s%s" % (new_id, val))
6089 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6091 """Generate a drbd8 device complete with its children.
6094 port = lu.cfg.AllocatePort()
6095 vgname = lu.cfg.GetVGName()
6096 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6097 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6098 logical_id=(vgname, names[0]))
6099 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6100 logical_id=(vgname, names[1]))
6101 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6102 logical_id=(primary, secondary, port,
6105 children=[dev_data, dev_meta],
6110 def _GenerateDiskTemplate(lu, template_name,
6111 instance_name, primary_node,
6112 secondary_nodes, disk_info,
6113 file_storage_dir, file_driver,
6115 """Generate the entire disk layout for a given template type.
6118 #TODO: compute space requirements
6120 vgname = lu.cfg.GetVGName()
6121 disk_count = len(disk_info)
6123 if template_name == constants.DT_DISKLESS:
6125 elif template_name == constants.DT_PLAIN:
6126 if len(secondary_nodes) != 0:
6127 raise errors.ProgrammerError("Wrong template configuration")
6129 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6130 for i in range(disk_count)])
6131 for idx, disk in enumerate(disk_info):
6132 disk_index = idx + base_index
6133 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6134 logical_id=(vgname, names[idx]),
6135 iv_name="disk/%d" % disk_index,
6137 disks.append(disk_dev)
6138 elif template_name == constants.DT_DRBD8:
6139 if len(secondary_nodes) != 1:
6140 raise errors.ProgrammerError("Wrong template configuration")
6141 remote_node = secondary_nodes[0]
6142 minors = lu.cfg.AllocateDRBDMinor(
6143 [primary_node, remote_node] * len(disk_info), instance_name)
6146 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6147 for i in range(disk_count)]):
6148 names.append(lv_prefix + "_data")
6149 names.append(lv_prefix + "_meta")
6150 for idx, disk in enumerate(disk_info):
6151 disk_index = idx + base_index
6152 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6153 disk["size"], names[idx*2:idx*2+2],
6154 "disk/%d" % disk_index,
6155 minors[idx*2], minors[idx*2+1])
6156 disk_dev.mode = disk["mode"]
6157 disks.append(disk_dev)
6158 elif template_name == constants.DT_FILE:
6159 if len(secondary_nodes) != 0:
6160 raise errors.ProgrammerError("Wrong template configuration")
6162 _RequireFileStorage()
6164 for idx, disk in enumerate(disk_info):
6165 disk_index = idx + base_index
6166 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6167 iv_name="disk/%d" % disk_index,
6168 logical_id=(file_driver,
6169 "%s/disk%d" % (file_storage_dir,
6172 disks.append(disk_dev)
6174 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6178 def _GetInstanceInfoText(instance):
6179 """Compute that text that should be added to the disk's metadata.
6182 return "originstname+%s" % instance.name
6185 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6186 """Create all disks for an instance.
6188 This abstracts away some work from AddInstance.
6190 @type lu: L{LogicalUnit}
6191 @param lu: the logical unit on whose behalf we execute
6192 @type instance: L{objects.Instance}
6193 @param instance: the instance whose disks we should create
6195 @param to_skip: list of indices to skip
6196 @type target_node: string
6197 @param target_node: if passed, overrides the target node for creation
6199 @return: the success of the creation
6202 info = _GetInstanceInfoText(instance)
6203 if target_node is None:
6204 pnode = instance.primary_node
6205 all_nodes = instance.all_nodes
6210 if instance.disk_template == constants.DT_FILE:
6211 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6212 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6214 result.Raise("Failed to create directory '%s' on"
6215 " node %s" % (file_storage_dir, pnode))
6217 # Note: this needs to be kept in sync with adding of disks in
6218 # LUSetInstanceParams
6219 for idx, device in enumerate(instance.disks):
6220 if to_skip and idx in to_skip:
6222 logging.info("Creating volume %s for instance %s",
6223 device.iv_name, instance.name)
6225 for node in all_nodes:
6226 f_create = node == pnode
6227 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6230 def _RemoveDisks(lu, instance, target_node=None):
6231 """Remove all disks for an instance.
6233 This abstracts away some work from `AddInstance()` and
6234 `RemoveInstance()`. Note that in case some of the devices couldn't
6235 be removed, the removal will continue with the other ones (compare
6236 with `_CreateDisks()`).
6238 @type lu: L{LogicalUnit}
6239 @param lu: the logical unit on whose behalf we execute
6240 @type instance: L{objects.Instance}
6241 @param instance: the instance whose disks we should remove
6242 @type target_node: string
6243 @param target_node: used to override the node on which to remove the disks
6245 @return: the success of the removal
6248 logging.info("Removing block devices for instance %s", instance.name)
6251 for device in instance.disks:
6253 edata = [(target_node, device)]
6255 edata = device.ComputeNodeTree(instance.primary_node)
6256 for node, disk in edata:
6257 lu.cfg.SetDiskID(disk, node)
6258 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6260 lu.LogWarning("Could not remove block device %s on node %s,"
6261 " continuing anyway: %s", device.iv_name, node, msg)
6264 if instance.disk_template == constants.DT_FILE:
6265 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6269 tgt = instance.primary_node
6270 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6272 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6273 file_storage_dir, instance.primary_node, result.fail_msg)
6279 def _ComputeDiskSize(disk_template, disks):
6280 """Compute disk size requirements in the volume group
6283 # Required free disk space as a function of disk and swap space
6285 constants.DT_DISKLESS: None,
6286 constants.DT_PLAIN: sum(d["size"] for d in disks),
6287 # 128 MB are added for drbd metadata for each disk
6288 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6289 constants.DT_FILE: None,
6292 if disk_template not in req_size_dict:
6293 raise errors.ProgrammerError("Disk template '%s' size requirement"
6294 " is unknown" % disk_template)
6296 return req_size_dict[disk_template]
6299 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6300 """Hypervisor parameter validation.
6302 This function abstract the hypervisor parameter validation to be
6303 used in both instance create and instance modify.
6305 @type lu: L{LogicalUnit}
6306 @param lu: the logical unit for which we check
6307 @type nodenames: list
6308 @param nodenames: the list of nodes on which we should check
6309 @type hvname: string
6310 @param hvname: the name of the hypervisor we should use
6311 @type hvparams: dict
6312 @param hvparams: the parameters which we need to check
6313 @raise errors.OpPrereqError: if the parameters are not valid
6316 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6319 for node in nodenames:
6323 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6326 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6327 """OS parameters validation.
6329 @type lu: L{LogicalUnit}
6330 @param lu: the logical unit for which we check
6331 @type required: boolean
6332 @param required: whether the validation should fail if the OS is not
6334 @type nodenames: list
6335 @param nodenames: the list of nodes on which we should check
6336 @type osname: string
6337 @param osname: the name of the hypervisor we should use
6338 @type osparams: dict
6339 @param osparams: the parameters which we need to check
6340 @raise errors.OpPrereqError: if the parameters are not valid
6343 result = lu.rpc.call_os_validate(required, nodenames, osname,
6344 [constants.OS_VALIDATE_PARAMETERS],
6346 for node, nres in result.items():
6347 # we don't check for offline cases since this should be run only
6348 # against the master node and/or an instance's nodes
6349 nres.Raise("OS Parameters validation failed on node %s" % node)
6350 if not nres.payload:
6351 lu.LogInfo("OS %s not found on node %s, validation skipped",
6355 class LUCreateInstance(LogicalUnit):
6356 """Create an instance.
6359 HPATH = "instance-add"
6360 HTYPE = constants.HTYPE_INSTANCE
6363 ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)),
6364 ("start", True, _TBool),
6365 ("wait_for_sync", True, _TBool),
6366 ("ip_check", True, _TBool),
6367 ("name_check", True, _TBool),
6368 ("disks", _NoDefault, _TListOf(_TDict)),
6369 ("nics", _NoDefault, _TListOf(_TDict)),
6370 ("hvparams", _EmptyDict, _TDict),
6371 ("beparams", _EmptyDict, _TDict),
6372 ("osparams", _EmptyDict, _TDict),
6373 ("no_install", None, _TMaybeBool),
6374 ("os_type", None, _TMaybeString),
6375 ("force_variant", False, _TBool),
6376 ("source_handshake", None, _TOr(_TList, _TNone)),
6377 ("source_x509_ca", None, _TOr(_TList, _TNone)),
6378 ("source_instance_name", None, _TMaybeString),
6379 ("src_node", None, _TMaybeString),
6380 ("src_path", None, _TMaybeString),
6381 ("pnode", None, _TMaybeString),
6382 ("snode", None, _TMaybeString),
6383 ("iallocator", None, _TMaybeString),
6384 ("hypervisor", None, _TMaybeString),
6385 ("disk_template", _NoDefault, _CheckDiskTemplate),
6386 ("identify_defaults", False, _TBool),
6387 ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))),
6388 ("file_storage_dir", None, _TMaybeString),
6389 ("dry_run", False, _TBool),
6393 def CheckArguments(self):
6397 # do not require name_check to ease forward/backward compatibility
6399 if self.op.no_install and self.op.start:
6400 self.LogInfo("No-installation mode selected, disabling startup")
6401 self.op.start = False
6402 # validate/normalize the instance name
6403 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6404 if self.op.ip_check and not self.op.name_check:
6405 # TODO: make the ip check more flexible and not depend on the name check
6406 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6409 # check nics' parameter names
6410 for nic in self.op.nics:
6411 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6413 # check disks. parameter names and consistent adopt/no-adopt strategy
6414 has_adopt = has_no_adopt = False
6415 for disk in self.op.disks:
6416 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6421 if has_adopt and has_no_adopt:
6422 raise errors.OpPrereqError("Either all disks are adopted or none is",
6425 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6426 raise errors.OpPrereqError("Disk adoption is not supported for the"
6427 " '%s' disk template" %
6428 self.op.disk_template,
6430 if self.op.iallocator is not None:
6431 raise errors.OpPrereqError("Disk adoption not allowed with an"
6432 " iallocator script", errors.ECODE_INVAL)
6433 if self.op.mode == constants.INSTANCE_IMPORT:
6434 raise errors.OpPrereqError("Disk adoption not allowed for"
6435 " instance import", errors.ECODE_INVAL)
6437 self.adopt_disks = has_adopt
6439 # instance name verification
6440 if self.op.name_check:
6441 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6442 self.op.instance_name = self.hostname1.name
6443 # used in CheckPrereq for ip ping check
6444 self.check_ip = self.hostname1.ip
6445 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6446 raise errors.OpPrereqError("Remote imports require names to be checked" %
6449 self.check_ip = None
6451 # file storage checks
6452 if (self.op.file_driver and
6453 not self.op.file_driver in constants.FILE_DRIVER):
6454 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6455 self.op.file_driver, errors.ECODE_INVAL)
6457 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6458 raise errors.OpPrereqError("File storage directory path not absolute",
6461 ### Node/iallocator related checks
6462 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6463 raise errors.OpPrereqError("One and only one of iallocator and primary"
6464 " node must be given",
6467 self._cds = _GetClusterDomainSecret()
6469 if self.op.mode == constants.INSTANCE_IMPORT:
6470 # On import force_variant must be True, because if we forced it at
6471 # initial install, our only chance when importing it back is that it
6473 self.op.force_variant = True
6475 if self.op.no_install:
6476 self.LogInfo("No-installation mode has no effect during import")
6478 elif self.op.mode == constants.INSTANCE_CREATE:
6479 if self.op.os_type is None:
6480 raise errors.OpPrereqError("No guest OS specified",
6482 if self.op.disk_template is None:
6483 raise errors.OpPrereqError("No disk template specified",
6486 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6487 # Check handshake to ensure both clusters have the same domain secret
6488 src_handshake = self.op.source_handshake
6489 if not src_handshake:
6490 raise errors.OpPrereqError("Missing source handshake",
6493 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6496 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6499 # Load and check source CA
6500 self.source_x509_ca_pem = self.op.source_x509_ca
6501 if not self.source_x509_ca_pem:
6502 raise errors.OpPrereqError("Missing source X509 CA",
6506 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6508 except OpenSSL.crypto.Error, err:
6509 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6510 (err, ), errors.ECODE_INVAL)
6512 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6513 if errcode is not None:
6514 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6517 self.source_x509_ca = cert
6519 src_instance_name = self.op.source_instance_name
6520 if not src_instance_name:
6521 raise errors.OpPrereqError("Missing source instance name",
6524 self.source_instance_name = \
6525 utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6528 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6529 self.op.mode, errors.ECODE_INVAL)
6531 def ExpandNames(self):
6532 """ExpandNames for CreateInstance.
6534 Figure out the right locks for instance creation.
6537 self.needed_locks = {}
6539 instance_name = self.op.instance_name
6540 # this is just a preventive check, but someone might still add this
6541 # instance in the meantime, and creation will fail at lock-add time
6542 if instance_name in self.cfg.GetInstanceList():
6543 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6544 instance_name, errors.ECODE_EXISTS)
6546 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6548 if self.op.iallocator:
6549 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6551 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6552 nodelist = [self.op.pnode]
6553 if self.op.snode is not None:
6554 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6555 nodelist.append(self.op.snode)
6556 self.needed_locks[locking.LEVEL_NODE] = nodelist
6558 # in case of import lock the source node too
6559 if self.op.mode == constants.INSTANCE_IMPORT:
6560 src_node = self.op.src_node
6561 src_path = self.op.src_path
6563 if src_path is None:
6564 self.op.src_path = src_path = self.op.instance_name
6566 if src_node is None:
6567 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6568 self.op.src_node = None
6569 if os.path.isabs(src_path):
6570 raise errors.OpPrereqError("Importing an instance from an absolute"
6571 " path requires a source node option.",
6574 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6575 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6576 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6577 if not os.path.isabs(src_path):
6578 self.op.src_path = src_path = \
6579 utils.PathJoin(constants.EXPORT_DIR, src_path)
6581 def _RunAllocator(self):
6582 """Run the allocator based on input opcode.
6585 nics = [n.ToDict() for n in self.nics]
6586 ial = IAllocator(self.cfg, self.rpc,
6587 mode=constants.IALLOCATOR_MODE_ALLOC,
6588 name=self.op.instance_name,
6589 disk_template=self.op.disk_template,
6592 vcpus=self.be_full[constants.BE_VCPUS],
6593 mem_size=self.be_full[constants.BE_MEMORY],
6596 hypervisor=self.op.hypervisor,
6599 ial.Run(self.op.iallocator)
6602 raise errors.OpPrereqError("Can't compute nodes using"
6603 " iallocator '%s': %s" %
6604 (self.op.iallocator, ial.info),
6606 if len(ial.result) != ial.required_nodes:
6607 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6608 " of nodes (%s), required %s" %
6609 (self.op.iallocator, len(ial.result),
6610 ial.required_nodes), errors.ECODE_FAULT)
6611 self.op.pnode = ial.result[0]
6612 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6613 self.op.instance_name, self.op.iallocator,
6614 utils.CommaJoin(ial.result))
6615 if ial.required_nodes == 2:
6616 self.op.snode = ial.result[1]
6618 def BuildHooksEnv(self):
6621 This runs on master, primary and secondary nodes of the instance.
6625 "ADD_MODE": self.op.mode,
6627 if self.op.mode == constants.INSTANCE_IMPORT:
6628 env["SRC_NODE"] = self.op.src_node
6629 env["SRC_PATH"] = self.op.src_path
6630 env["SRC_IMAGES"] = self.src_images
6632 env.update(_BuildInstanceHookEnv(
6633 name=self.op.instance_name,
6634 primary_node=self.op.pnode,
6635 secondary_nodes=self.secondaries,
6636 status=self.op.start,
6637 os_type=self.op.os_type,
6638 memory=self.be_full[constants.BE_MEMORY],
6639 vcpus=self.be_full[constants.BE_VCPUS],
6640 nics=_NICListToTuple(self, self.nics),
6641 disk_template=self.op.disk_template,
6642 disks=[(d["size"], d["mode"]) for d in self.disks],
6645 hypervisor_name=self.op.hypervisor,
6648 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6652 def _ReadExportInfo(self):
6653 """Reads the export information from disk.
6655 It will override the opcode source node and path with the actual
6656 information, if these two were not specified before.
6658 @return: the export information
6661 assert self.op.mode == constants.INSTANCE_IMPORT
6663 src_node = self.op.src_node
6664 src_path = self.op.src_path
6666 if src_node is None:
6667 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6668 exp_list = self.rpc.call_export_list(locked_nodes)
6670 for node in exp_list:
6671 if exp_list[node].fail_msg:
6673 if src_path in exp_list[node].payload:
6675 self.op.src_node = src_node = node
6676 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6680 raise errors.OpPrereqError("No export found for relative path %s" %
6681 src_path, errors.ECODE_INVAL)
6683 _CheckNodeOnline(self, src_node)
6684 result = self.rpc.call_export_info(src_node, src_path)
6685 result.Raise("No export or invalid export found in dir %s" % src_path)
6687 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6688 if not export_info.has_section(constants.INISECT_EXP):
6689 raise errors.ProgrammerError("Corrupted export config",
6690 errors.ECODE_ENVIRON)
6692 ei_version = export_info.get(constants.INISECT_EXP, "version")
6693 if (int(ei_version) != constants.EXPORT_VERSION):
6694 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6695 (ei_version, constants.EXPORT_VERSION),
6696 errors.ECODE_ENVIRON)
6699 def _ReadExportParams(self, einfo):
6700 """Use export parameters as defaults.
6702 In case the opcode doesn't specify (as in override) some instance
6703 parameters, then try to use them from the export information, if
6707 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6709 if self.op.disk_template is None:
6710 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6711 self.op.disk_template = einfo.get(constants.INISECT_INS,
6714 raise errors.OpPrereqError("No disk template specified and the export"
6715 " is missing the disk_template information",
6718 if not self.op.disks:
6719 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6721 # TODO: import the disk iv_name too
6722 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6723 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6724 disks.append({"size": disk_sz})
6725 self.op.disks = disks
6727 raise errors.OpPrereqError("No disk info specified and the export"
6728 " is missing the disk information",
6731 if (not self.op.nics and
6732 einfo.has_option(constants.INISECT_INS, "nic_count")):
6734 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6736 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6737 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6742 if (self.op.hypervisor is None and
6743 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6744 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6745 if einfo.has_section(constants.INISECT_HYP):
6746 # use the export parameters but do not override the ones
6747 # specified by the user
6748 for name, value in einfo.items(constants.INISECT_HYP):
6749 if name not in self.op.hvparams:
6750 self.op.hvparams[name] = value
6752 if einfo.has_section(constants.INISECT_BEP):
6753 # use the parameters, without overriding
6754 for name, value in einfo.items(constants.INISECT_BEP):
6755 if name not in self.op.beparams:
6756 self.op.beparams[name] = value
6758 # try to read the parameters old style, from the main section
6759 for name in constants.BES_PARAMETERS:
6760 if (name not in self.op.beparams and
6761 einfo.has_option(constants.INISECT_INS, name)):
6762 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6764 if einfo.has_section(constants.INISECT_OSP):
6765 # use the parameters, without overriding
6766 for name, value in einfo.items(constants.INISECT_OSP):
6767 if name not in self.op.osparams:
6768 self.op.osparams[name] = value
6770 def _RevertToDefaults(self, cluster):
6771 """Revert the instance parameters to the default values.
6775 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6776 for name in self.op.hvparams.keys():
6777 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6778 del self.op.hvparams[name]
6780 be_defs = cluster.SimpleFillBE({})
6781 for name in self.op.beparams.keys():
6782 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6783 del self.op.beparams[name]
6785 nic_defs = cluster.SimpleFillNIC({})
6786 for nic in self.op.nics:
6787 for name in constants.NICS_PARAMETERS:
6788 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6791 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6792 for name in self.op.osparams.keys():
6793 if name in os_defs and os_defs[name] == self.op.osparams[name]:
6794 del self.op.osparams[name]
6796 def CheckPrereq(self):
6797 """Check prerequisites.
6800 if self.op.mode == constants.INSTANCE_IMPORT:
6801 export_info = self._ReadExportInfo()
6802 self._ReadExportParams(export_info)
6804 _CheckDiskTemplate(self.op.disk_template)
6806 if (not self.cfg.GetVGName() and
6807 self.op.disk_template not in constants.DTS_NOT_LVM):
6808 raise errors.OpPrereqError("Cluster does not support lvm-based"
6809 " instances", errors.ECODE_STATE)
6811 if self.op.hypervisor is None:
6812 self.op.hypervisor = self.cfg.GetHypervisorType()
6814 cluster = self.cfg.GetClusterInfo()
6815 enabled_hvs = cluster.enabled_hypervisors
6816 if self.op.hypervisor not in enabled_hvs:
6817 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6818 " cluster (%s)" % (self.op.hypervisor,
6819 ",".join(enabled_hvs)),
6822 # check hypervisor parameter syntax (locally)
6823 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6824 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6826 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6827 hv_type.CheckParameterSyntax(filled_hvp)
6828 self.hv_full = filled_hvp
6829 # check that we don't specify global parameters on an instance
6830 _CheckGlobalHvParams(self.op.hvparams)
6832 # fill and remember the beparams dict
6833 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6834 self.be_full = cluster.SimpleFillBE(self.op.beparams)
6836 # build os parameters
6837 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6839 # now that hvp/bep are in final format, let's reset to defaults,
6841 if self.op.identify_defaults:
6842 self._RevertToDefaults(cluster)
6846 for idx, nic in enumerate(self.op.nics):
6847 nic_mode_req = nic.get("mode", None)
6848 nic_mode = nic_mode_req
6849 if nic_mode is None:
6850 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6852 # in routed mode, for the first nic, the default ip is 'auto'
6853 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6854 default_ip_mode = constants.VALUE_AUTO
6856 default_ip_mode = constants.VALUE_NONE
6858 # ip validity checks
6859 ip = nic.get("ip", default_ip_mode)
6860 if ip is None or ip.lower() == constants.VALUE_NONE:
6862 elif ip.lower() == constants.VALUE_AUTO:
6863 if not self.op.name_check:
6864 raise errors.OpPrereqError("IP address set to auto but name checks"
6865 " have been skipped. Aborting.",
6867 nic_ip = self.hostname1.ip
6869 if not utils.IsValidIP4(ip):
6870 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6871 " like a valid IP" % ip,
6875 # TODO: check the ip address for uniqueness
6876 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6877 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6880 # MAC address verification
6881 mac = nic.get("mac", constants.VALUE_AUTO)
6882 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6883 mac = utils.NormalizeAndValidateMac(mac)
6886 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6887 except errors.ReservationError:
6888 raise errors.OpPrereqError("MAC address %s already in use"
6889 " in cluster" % mac,
6890 errors.ECODE_NOTUNIQUE)
6892 # bridge verification
6893 bridge = nic.get("bridge", None)
6894 link = nic.get("link", None)
6896 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6897 " at the same time", errors.ECODE_INVAL)
6898 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6899 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6906 nicparams[constants.NIC_MODE] = nic_mode_req
6908 nicparams[constants.NIC_LINK] = link
6910 check_params = cluster.SimpleFillNIC(nicparams)
6911 objects.NIC.CheckParameterSyntax(check_params)
6912 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6914 # disk checks/pre-build
6916 for disk in self.op.disks:
6917 mode = disk.get("mode", constants.DISK_RDWR)
6918 if mode not in constants.DISK_ACCESS_SET:
6919 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6920 mode, errors.ECODE_INVAL)
6921 size = disk.get("size", None)
6923 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6926 except (TypeError, ValueError):
6927 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6929 new_disk = {"size": size, "mode": mode}
6931 new_disk["adopt"] = disk["adopt"]
6932 self.disks.append(new_disk)
6934 if self.op.mode == constants.INSTANCE_IMPORT:
6936 # Check that the new instance doesn't have less disks than the export
6937 instance_disks = len(self.disks)
6938 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6939 if instance_disks < export_disks:
6940 raise errors.OpPrereqError("Not enough disks to import."
6941 " (instance: %d, export: %d)" %
6942 (instance_disks, export_disks),
6946 for idx in range(export_disks):
6947 option = 'disk%d_dump' % idx
6948 if export_info.has_option(constants.INISECT_INS, option):
6949 # FIXME: are the old os-es, disk sizes, etc. useful?
6950 export_name = export_info.get(constants.INISECT_INS, option)
6951 image = utils.PathJoin(self.op.src_path, export_name)
6952 disk_images.append(image)
6954 disk_images.append(False)
6956 self.src_images = disk_images
6958 old_name = export_info.get(constants.INISECT_INS, 'name')
6960 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6961 except (TypeError, ValueError), err:
6962 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6963 " an integer: %s" % str(err),
6965 if self.op.instance_name == old_name:
6966 for idx, nic in enumerate(self.nics):
6967 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6968 nic_mac_ini = 'nic%d_mac' % idx
6969 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6971 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6973 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6974 if self.op.ip_check:
6975 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6976 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6977 (self.check_ip, self.op.instance_name),
6978 errors.ECODE_NOTUNIQUE)
6980 #### mac address generation
6981 # By generating here the mac address both the allocator and the hooks get
6982 # the real final mac address rather than the 'auto' or 'generate' value.
6983 # There is a race condition between the generation and the instance object
6984 # creation, which means that we know the mac is valid now, but we're not
6985 # sure it will be when we actually add the instance. If things go bad
6986 # adding the instance will abort because of a duplicate mac, and the
6987 # creation job will fail.
6988 for nic in self.nics:
6989 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6990 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6994 if self.op.iallocator is not None:
6995 self._RunAllocator()
6997 #### node related checks
6999 # check primary node
7000 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7001 assert self.pnode is not None, \
7002 "Cannot retrieve locked node %s" % self.op.pnode
7004 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7005 pnode.name, errors.ECODE_STATE)
7007 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7008 pnode.name, errors.ECODE_STATE)
7010 self.secondaries = []
7012 # mirror node verification
7013 if self.op.disk_template in constants.DTS_NET_MIRROR:
7014 if self.op.snode is None:
7015 raise errors.OpPrereqError("The networked disk templates need"
7016 " a mirror node", errors.ECODE_INVAL)
7017 if self.op.snode == pnode.name:
7018 raise errors.OpPrereqError("The secondary node cannot be the"
7019 " primary node.", errors.ECODE_INVAL)
7020 _CheckNodeOnline(self, self.op.snode)
7021 _CheckNodeNotDrained(self, self.op.snode)
7022 self.secondaries.append(self.op.snode)
7024 nodenames = [pnode.name] + self.secondaries
7026 req_size = _ComputeDiskSize(self.op.disk_template,
7029 # Check lv size requirements, if not adopting
7030 if req_size is not None and not self.adopt_disks:
7031 _CheckNodesFreeDisk(self, nodenames, req_size)
7033 if self.adopt_disks: # instead, we must check the adoption data
7034 all_lvs = set([i["adopt"] for i in self.disks])
7035 if len(all_lvs) != len(self.disks):
7036 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7038 for lv_name in all_lvs:
7040 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7041 except errors.ReservationError:
7042 raise errors.OpPrereqError("LV named %s used by another instance" %
7043 lv_name, errors.ECODE_NOTUNIQUE)
7045 node_lvs = self.rpc.call_lv_list([pnode.name],
7046 self.cfg.GetVGName())[pnode.name]
7047 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7048 node_lvs = node_lvs.payload
7049 delta = all_lvs.difference(node_lvs.keys())
7051 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7052 utils.CommaJoin(delta),
7054 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7056 raise errors.OpPrereqError("Online logical volumes found, cannot"
7057 " adopt: %s" % utils.CommaJoin(online_lvs),
7059 # update the size of disk based on what is found
7060 for dsk in self.disks:
7061 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7063 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7065 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7066 # check OS parameters (remotely)
7067 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7069 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7071 # memory check on primary node
7073 _CheckNodeFreeMemory(self, self.pnode.name,
7074 "creating instance %s" % self.op.instance_name,
7075 self.be_full[constants.BE_MEMORY],
7078 self.dry_run_result = list(nodenames)
7080 def Exec(self, feedback_fn):
7081 """Create and add the instance to the cluster.
7084 instance = self.op.instance_name
7085 pnode_name = self.pnode.name
7087 ht_kind = self.op.hypervisor
7088 if ht_kind in constants.HTS_REQ_PORT:
7089 network_port = self.cfg.AllocatePort()
7093 if constants.ENABLE_FILE_STORAGE:
7094 # this is needed because os.path.join does not accept None arguments
7095 if self.op.file_storage_dir is None:
7096 string_file_storage_dir = ""
7098 string_file_storage_dir = self.op.file_storage_dir
7100 # build the full file storage dir path
7101 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7102 string_file_storage_dir, instance)
7104 file_storage_dir = ""
7106 disks = _GenerateDiskTemplate(self,
7107 self.op.disk_template,
7108 instance, pnode_name,
7112 self.op.file_driver,
7115 iobj = objects.Instance(name=instance, os=self.op.os_type,
7116 primary_node=pnode_name,
7117 nics=self.nics, disks=disks,
7118 disk_template=self.op.disk_template,
7120 network_port=network_port,
7121 beparams=self.op.beparams,
7122 hvparams=self.op.hvparams,
7123 hypervisor=self.op.hypervisor,
7124 osparams=self.op.osparams,
7127 if self.adopt_disks:
7128 # rename LVs to the newly-generated names; we need to construct
7129 # 'fake' LV disks with the old data, plus the new unique_id
7130 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7132 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7133 rename_to.append(t_dsk.logical_id)
7134 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7135 self.cfg.SetDiskID(t_dsk, pnode_name)
7136 result = self.rpc.call_blockdev_rename(pnode_name,
7137 zip(tmp_disks, rename_to))
7138 result.Raise("Failed to rename adoped LVs")
7140 feedback_fn("* creating instance disks...")
7142 _CreateDisks(self, iobj)
7143 except errors.OpExecError:
7144 self.LogWarning("Device creation failed, reverting...")
7146 _RemoveDisks(self, iobj)
7148 self.cfg.ReleaseDRBDMinors(instance)
7151 feedback_fn("adding instance %s to cluster config" % instance)
7153 self.cfg.AddInstance(iobj, self.proc.GetECId())
7155 # Declare that we don't want to remove the instance lock anymore, as we've
7156 # added the instance to the config
7157 del self.remove_locks[locking.LEVEL_INSTANCE]
7158 # Unlock all the nodes
7159 if self.op.mode == constants.INSTANCE_IMPORT:
7160 nodes_keep = [self.op.src_node]
7161 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7162 if node != self.op.src_node]
7163 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7164 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7166 self.context.glm.release(locking.LEVEL_NODE)
7167 del self.acquired_locks[locking.LEVEL_NODE]
7169 if self.op.wait_for_sync:
7170 disk_abort = not _WaitForSync(self, iobj)
7171 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7172 # make sure the disks are not degraded (still sync-ing is ok)
7174 feedback_fn("* checking mirrors status")
7175 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7180 _RemoveDisks(self, iobj)
7181 self.cfg.RemoveInstance(iobj.name)
7182 # Make sure the instance lock gets removed
7183 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7184 raise errors.OpExecError("There are some degraded disks for"
7187 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7188 if self.op.mode == constants.INSTANCE_CREATE:
7189 if not self.op.no_install:
7190 feedback_fn("* running the instance OS create scripts...")
7191 # FIXME: pass debug option from opcode to backend
7192 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7193 self.op.debug_level)
7194 result.Raise("Could not add os for instance %s"
7195 " on node %s" % (instance, pnode_name))
7197 elif self.op.mode == constants.INSTANCE_IMPORT:
7198 feedback_fn("* running the instance OS import scripts...")
7202 for idx, image in enumerate(self.src_images):
7206 # FIXME: pass debug option from opcode to backend
7207 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7208 constants.IEIO_FILE, (image, ),
7209 constants.IEIO_SCRIPT,
7210 (iobj.disks[idx], idx),
7212 transfers.append(dt)
7215 masterd.instance.TransferInstanceData(self, feedback_fn,
7216 self.op.src_node, pnode_name,
7217 self.pnode.secondary_ip,
7219 if not compat.all(import_result):
7220 self.LogWarning("Some disks for instance %s on node %s were not"
7221 " imported successfully" % (instance, pnode_name))
7223 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7224 feedback_fn("* preparing remote import...")
7225 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7226 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7228 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7229 self.source_x509_ca,
7230 self._cds, timeouts)
7231 if not compat.all(disk_results):
7232 # TODO: Should the instance still be started, even if some disks
7233 # failed to import (valid for local imports, too)?
7234 self.LogWarning("Some disks for instance %s on node %s were not"
7235 " imported successfully" % (instance, pnode_name))
7237 # Run rename script on newly imported instance
7238 assert iobj.name == instance
7239 feedback_fn("Running rename script for %s" % instance)
7240 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7241 self.source_instance_name,
7242 self.op.debug_level)
7244 self.LogWarning("Failed to run rename script for %s on node"
7245 " %s: %s" % (instance, pnode_name, result.fail_msg))
7248 # also checked in the prereq part
7249 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7253 iobj.admin_up = True
7254 self.cfg.Update(iobj, feedback_fn)
7255 logging.info("Starting instance %s on node %s", instance, pnode_name)
7256 feedback_fn("* starting instance...")
7257 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7258 result.Raise("Could not start instance")
7260 return list(iobj.all_nodes)
7263 class LUConnectConsole(NoHooksLU):
7264 """Connect to an instance's console.
7266 This is somewhat special in that it returns the command line that
7267 you need to run on the master node in order to connect to the
7276 def ExpandNames(self):
7277 self._ExpandAndLockInstance()
7279 def CheckPrereq(self):
7280 """Check prerequisites.
7282 This checks that the instance is in the cluster.
7285 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7286 assert self.instance is not None, \
7287 "Cannot retrieve locked instance %s" % self.op.instance_name
7288 _CheckNodeOnline(self, self.instance.primary_node)
7290 def Exec(self, feedback_fn):
7291 """Connect to the console of an instance
7294 instance = self.instance
7295 node = instance.primary_node
7297 node_insts = self.rpc.call_instance_list([node],
7298 [instance.hypervisor])[node]
7299 node_insts.Raise("Can't get node information from %s" % node)
7301 if instance.name not in node_insts.payload:
7302 raise errors.OpExecError("Instance %s is not running." % instance.name)
7304 logging.debug("Connecting to console of %s on %s", instance.name, node)
7306 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7307 cluster = self.cfg.GetClusterInfo()
7308 # beparams and hvparams are passed separately, to avoid editing the
7309 # instance and then saving the defaults in the instance itself.
7310 hvparams = cluster.FillHV(instance)
7311 beparams = cluster.FillBE(instance)
7312 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7315 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7318 class LUReplaceDisks(LogicalUnit):
7319 """Replace the disks of an instance.
7322 HPATH = "mirrors-replace"
7323 HTYPE = constants.HTYPE_INSTANCE
7326 ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)),
7327 ("disks", _EmptyList, _TListOf(_TPositiveInt)),
7328 ("remote_node", None, _TMaybeString),
7329 ("iallocator", None, _TMaybeString),
7330 ("early_release", False, _TBool),
7334 def CheckArguments(self):
7335 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7338 def ExpandNames(self):
7339 self._ExpandAndLockInstance()
7341 if self.op.iallocator is not None:
7342 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7344 elif self.op.remote_node is not None:
7345 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7346 self.op.remote_node = remote_node
7348 # Warning: do not remove the locking of the new secondary here
7349 # unless DRBD8.AddChildren is changed to work in parallel;
7350 # currently it doesn't since parallel invocations of
7351 # FindUnusedMinor will conflict
7352 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7353 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7356 self.needed_locks[locking.LEVEL_NODE] = []
7357 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7359 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7360 self.op.iallocator, self.op.remote_node,
7361 self.op.disks, False, self.op.early_release)
7363 self.tasklets = [self.replacer]
7365 def DeclareLocks(self, level):
7366 # If we're not already locking all nodes in the set we have to declare the
7367 # instance's primary/secondary nodes.
7368 if (level == locking.LEVEL_NODE and
7369 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7370 self._LockInstancesNodes()
7372 def BuildHooksEnv(self):
7375 This runs on the master, the primary and all the secondaries.
7378 instance = self.replacer.instance
7380 "MODE": self.op.mode,
7381 "NEW_SECONDARY": self.op.remote_node,
7382 "OLD_SECONDARY": instance.secondary_nodes[0],
7384 env.update(_BuildInstanceHookEnvByObject(self, instance))
7386 self.cfg.GetMasterNode(),
7387 instance.primary_node,
7389 if self.op.remote_node is not None:
7390 nl.append(self.op.remote_node)
7394 class TLReplaceDisks(Tasklet):
7395 """Replaces disks for an instance.
7397 Note: Locking is not within the scope of this class.
7400 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7401 disks, delay_iallocator, early_release):
7402 """Initializes this class.
7405 Tasklet.__init__(self, lu)
7408 self.instance_name = instance_name
7410 self.iallocator_name = iallocator_name
7411 self.remote_node = remote_node
7413 self.delay_iallocator = delay_iallocator
7414 self.early_release = early_release
7417 self.instance = None
7418 self.new_node = None
7419 self.target_node = None
7420 self.other_node = None
7421 self.remote_node_info = None
7422 self.node_secondary_ip = None
7425 def CheckArguments(mode, remote_node, iallocator):
7426 """Helper function for users of this class.
7429 # check for valid parameter combination
7430 if mode == constants.REPLACE_DISK_CHG:
7431 if remote_node is None and iallocator is None:
7432 raise errors.OpPrereqError("When changing the secondary either an"
7433 " iallocator script must be used or the"
7434 " new node given", errors.ECODE_INVAL)
7436 if remote_node is not None and iallocator is not None:
7437 raise errors.OpPrereqError("Give either the iallocator or the new"
7438 " secondary, not both", errors.ECODE_INVAL)
7440 elif remote_node is not None or iallocator is not None:
7441 # Not replacing the secondary
7442 raise errors.OpPrereqError("The iallocator and new node options can"
7443 " only be used when changing the"
7444 " secondary node", errors.ECODE_INVAL)
7447 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7448 """Compute a new secondary node using an IAllocator.
7451 ial = IAllocator(lu.cfg, lu.rpc,
7452 mode=constants.IALLOCATOR_MODE_RELOC,
7454 relocate_from=relocate_from)
7456 ial.Run(iallocator_name)
7459 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7460 " %s" % (iallocator_name, ial.info),
7463 if len(ial.result) != ial.required_nodes:
7464 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7465 " of nodes (%s), required %s" %
7467 len(ial.result), ial.required_nodes),
7470 remote_node_name = ial.result[0]
7472 lu.LogInfo("Selected new secondary for instance '%s': %s",
7473 instance_name, remote_node_name)
7475 return remote_node_name
7477 def _FindFaultyDisks(self, node_name):
7478 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7481 def CheckPrereq(self):
7482 """Check prerequisites.
7484 This checks that the instance is in the cluster.
7487 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7488 assert instance is not None, \
7489 "Cannot retrieve locked instance %s" % self.instance_name
7491 if instance.disk_template != constants.DT_DRBD8:
7492 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7493 " instances", errors.ECODE_INVAL)
7495 if len(instance.secondary_nodes) != 1:
7496 raise errors.OpPrereqError("The instance has a strange layout,"
7497 " expected one secondary but found %d" %
7498 len(instance.secondary_nodes),
7501 if not self.delay_iallocator:
7502 self._CheckPrereq2()
7504 def _CheckPrereq2(self):
7505 """Check prerequisites, second part.
7507 This function should always be part of CheckPrereq. It was separated and is
7508 now called from Exec because during node evacuation iallocator was only
7509 called with an unmodified cluster model, not taking planned changes into
7513 instance = self.instance
7514 secondary_node = instance.secondary_nodes[0]
7516 if self.iallocator_name is None:
7517 remote_node = self.remote_node
7519 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7520 instance.name, instance.secondary_nodes)
7522 if remote_node is not None:
7523 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7524 assert self.remote_node_info is not None, \
7525 "Cannot retrieve locked node %s" % remote_node
7527 self.remote_node_info = None
7529 if remote_node == self.instance.primary_node:
7530 raise errors.OpPrereqError("The specified node is the primary node of"
7531 " the instance.", errors.ECODE_INVAL)
7533 if remote_node == secondary_node:
7534 raise errors.OpPrereqError("The specified node is already the"
7535 " secondary node of the instance.",
7538 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7539 constants.REPLACE_DISK_CHG):
7540 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7543 if self.mode == constants.REPLACE_DISK_AUTO:
7544 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7545 faulty_secondary = self._FindFaultyDisks(secondary_node)
7547 if faulty_primary and faulty_secondary:
7548 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7549 " one node and can not be repaired"
7550 " automatically" % self.instance_name,
7554 self.disks = faulty_primary
7555 self.target_node = instance.primary_node
7556 self.other_node = secondary_node
7557 check_nodes = [self.target_node, self.other_node]
7558 elif faulty_secondary:
7559 self.disks = faulty_secondary
7560 self.target_node = secondary_node
7561 self.other_node = instance.primary_node
7562 check_nodes = [self.target_node, self.other_node]
7568 # Non-automatic modes
7569 if self.mode == constants.REPLACE_DISK_PRI:
7570 self.target_node = instance.primary_node
7571 self.other_node = secondary_node
7572 check_nodes = [self.target_node, self.other_node]
7574 elif self.mode == constants.REPLACE_DISK_SEC:
7575 self.target_node = secondary_node
7576 self.other_node = instance.primary_node
7577 check_nodes = [self.target_node, self.other_node]
7579 elif self.mode == constants.REPLACE_DISK_CHG:
7580 self.new_node = remote_node
7581 self.other_node = instance.primary_node
7582 self.target_node = secondary_node
7583 check_nodes = [self.new_node, self.other_node]
7585 _CheckNodeNotDrained(self.lu, remote_node)
7587 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7588 assert old_node_info is not None
7589 if old_node_info.offline and not self.early_release:
7590 # doesn't make sense to delay the release
7591 self.early_release = True
7592 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7593 " early-release mode", secondary_node)
7596 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7599 # If not specified all disks should be replaced
7601 self.disks = range(len(self.instance.disks))
7603 for node in check_nodes:
7604 _CheckNodeOnline(self.lu, node)
7606 # Check whether disks are valid
7607 for disk_idx in self.disks:
7608 instance.FindDisk(disk_idx)
7610 # Get secondary node IP addresses
7613 for node_name in [self.target_node, self.other_node, self.new_node]:
7614 if node_name is not None:
7615 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7617 self.node_secondary_ip = node_2nd_ip
7619 def Exec(self, feedback_fn):
7620 """Execute disk replacement.
7622 This dispatches the disk replacement to the appropriate handler.
7625 if self.delay_iallocator:
7626 self._CheckPrereq2()
7629 feedback_fn("No disks need replacement")
7632 feedback_fn("Replacing disk(s) %s for %s" %
7633 (utils.CommaJoin(self.disks), self.instance.name))
7635 activate_disks = (not self.instance.admin_up)
7637 # Activate the instance disks if we're replacing them on a down instance
7639 _StartInstanceDisks(self.lu, self.instance, True)
7642 # Should we replace the secondary node?
7643 if self.new_node is not None:
7644 fn = self._ExecDrbd8Secondary
7646 fn = self._ExecDrbd8DiskOnly
7648 return fn(feedback_fn)
7651 # Deactivate the instance disks if we're replacing them on a
7654 _SafeShutdownInstanceDisks(self.lu, self.instance)
7656 def _CheckVolumeGroup(self, nodes):
7657 self.lu.LogInfo("Checking volume groups")
7659 vgname = self.cfg.GetVGName()
7661 # Make sure volume group exists on all involved nodes
7662 results = self.rpc.call_vg_list(nodes)
7664 raise errors.OpExecError("Can't list volume groups on the nodes")
7668 res.Raise("Error checking node %s" % node)
7669 if vgname not in res.payload:
7670 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7673 def _CheckDisksExistence(self, nodes):
7674 # Check disk existence
7675 for idx, dev in enumerate(self.instance.disks):
7676 if idx not in self.disks:
7680 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7681 self.cfg.SetDiskID(dev, node)
7683 result = self.rpc.call_blockdev_find(node, dev)
7685 msg = result.fail_msg
7686 if msg or not result.payload:
7688 msg = "disk not found"
7689 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7692 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7693 for idx, dev in enumerate(self.instance.disks):
7694 if idx not in self.disks:
7697 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7700 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7702 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7703 " replace disks for instance %s" %
7704 (node_name, self.instance.name))
7706 def _CreateNewStorage(self, node_name):
7707 vgname = self.cfg.GetVGName()
7710 for idx, dev in enumerate(self.instance.disks):
7711 if idx not in self.disks:
7714 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7716 self.cfg.SetDiskID(dev, node_name)
7718 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7719 names = _GenerateUniqueNames(self.lu, lv_names)
7721 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7722 logical_id=(vgname, names[0]))
7723 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7724 logical_id=(vgname, names[1]))
7726 new_lvs = [lv_data, lv_meta]
7727 old_lvs = dev.children
7728 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7730 # we pass force_create=True to force the LVM creation
7731 for new_lv in new_lvs:
7732 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7733 _GetInstanceInfoText(self.instance), False)
7737 def _CheckDevices(self, node_name, iv_names):
7738 for name, (dev, _, _) in iv_names.iteritems():
7739 self.cfg.SetDiskID(dev, node_name)
7741 result = self.rpc.call_blockdev_find(node_name, dev)
7743 msg = result.fail_msg
7744 if msg or not result.payload:
7746 msg = "disk not found"
7747 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7750 if result.payload.is_degraded:
7751 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7753 def _RemoveOldStorage(self, node_name, iv_names):
7754 for name, (_, old_lvs, _) in iv_names.iteritems():
7755 self.lu.LogInfo("Remove logical volumes for %s" % name)
7758 self.cfg.SetDiskID(lv, node_name)
7760 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7762 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7763 hint="remove unused LVs manually")
7765 def _ReleaseNodeLock(self, node_name):
7766 """Releases the lock for a given node."""
7767 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7769 def _ExecDrbd8DiskOnly(self, feedback_fn):
7770 """Replace a disk on the primary or secondary for DRBD 8.
7772 The algorithm for replace is quite complicated:
7774 1. for each disk to be replaced:
7776 1. create new LVs on the target node with unique names
7777 1. detach old LVs from the drbd device
7778 1. rename old LVs to name_replaced.<time_t>
7779 1. rename new LVs to old LVs
7780 1. attach the new LVs (with the old names now) to the drbd device
7782 1. wait for sync across all devices
7784 1. for each modified disk:
7786 1. remove old LVs (which have the name name_replaces.<time_t>)
7788 Failures are not very well handled.
7793 # Step: check device activation
7794 self.lu.LogStep(1, steps_total, "Check device existence")
7795 self._CheckDisksExistence([self.other_node, self.target_node])
7796 self._CheckVolumeGroup([self.target_node, self.other_node])
7798 # Step: check other node consistency
7799 self.lu.LogStep(2, steps_total, "Check peer consistency")
7800 self._CheckDisksConsistency(self.other_node,
7801 self.other_node == self.instance.primary_node,
7804 # Step: create new storage
7805 self.lu.LogStep(3, steps_total, "Allocate new storage")
7806 iv_names = self._CreateNewStorage(self.target_node)
7808 # Step: for each lv, detach+rename*2+attach
7809 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7810 for dev, old_lvs, new_lvs in iv_names.itervalues():
7811 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7813 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7815 result.Raise("Can't detach drbd from local storage on node"
7816 " %s for device %s" % (self.target_node, dev.iv_name))
7818 #cfg.Update(instance)
7820 # ok, we created the new LVs, so now we know we have the needed
7821 # storage; as such, we proceed on the target node to rename
7822 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7823 # using the assumption that logical_id == physical_id (which in
7824 # turn is the unique_id on that node)
7826 # FIXME(iustin): use a better name for the replaced LVs
7827 temp_suffix = int(time.time())
7828 ren_fn = lambda d, suff: (d.physical_id[0],
7829 d.physical_id[1] + "_replaced-%s" % suff)
7831 # Build the rename list based on what LVs exist on the node
7832 rename_old_to_new = []
7833 for to_ren in old_lvs:
7834 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7835 if not result.fail_msg and result.payload:
7837 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7839 self.lu.LogInfo("Renaming the old LVs on the target node")
7840 result = self.rpc.call_blockdev_rename(self.target_node,
7842 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7844 # Now we rename the new LVs to the old LVs
7845 self.lu.LogInfo("Renaming the new LVs on the target node")
7846 rename_new_to_old = [(new, old.physical_id)
7847 for old, new in zip(old_lvs, new_lvs)]
7848 result = self.rpc.call_blockdev_rename(self.target_node,
7850 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7852 for old, new in zip(old_lvs, new_lvs):
7853 new.logical_id = old.logical_id
7854 self.cfg.SetDiskID(new, self.target_node)
7856 for disk in old_lvs:
7857 disk.logical_id = ren_fn(disk, temp_suffix)
7858 self.cfg.SetDiskID(disk, self.target_node)
7860 # Now that the new lvs have the old name, we can add them to the device
7861 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7862 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7864 msg = result.fail_msg
7866 for new_lv in new_lvs:
7867 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7870 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7871 hint=("cleanup manually the unused logical"
7873 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7875 dev.children = new_lvs
7877 self.cfg.Update(self.instance, feedback_fn)
7880 if self.early_release:
7881 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7883 self._RemoveOldStorage(self.target_node, iv_names)
7884 # WARNING: we release both node locks here, do not do other RPCs
7885 # than WaitForSync to the primary node
7886 self._ReleaseNodeLock([self.target_node, self.other_node])
7889 # This can fail as the old devices are degraded and _WaitForSync
7890 # does a combined result over all disks, so we don't check its return value
7891 self.lu.LogStep(cstep, steps_total, "Sync devices")
7893 _WaitForSync(self.lu, self.instance)
7895 # Check all devices manually
7896 self._CheckDevices(self.instance.primary_node, iv_names)
7898 # Step: remove old storage
7899 if not self.early_release:
7900 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7902 self._RemoveOldStorage(self.target_node, iv_names)
7904 def _ExecDrbd8Secondary(self, feedback_fn):
7905 """Replace the secondary node for DRBD 8.
7907 The algorithm for replace is quite complicated:
7908 - for all disks of the instance:
7909 - create new LVs on the new node with same names
7910 - shutdown the drbd device on the old secondary
7911 - disconnect the drbd network on the primary
7912 - create the drbd device on the new secondary
7913 - network attach the drbd on the primary, using an artifice:
7914 the drbd code for Attach() will connect to the network if it
7915 finds a device which is connected to the good local disks but
7917 - wait for sync across all devices
7918 - remove all disks from the old secondary
7920 Failures are not very well handled.
7925 # Step: check device activation
7926 self.lu.LogStep(1, steps_total, "Check device existence")
7927 self._CheckDisksExistence([self.instance.primary_node])
7928 self._CheckVolumeGroup([self.instance.primary_node])
7930 # Step: check other node consistency
7931 self.lu.LogStep(2, steps_total, "Check peer consistency")
7932 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7934 # Step: create new storage
7935 self.lu.LogStep(3, steps_total, "Allocate new storage")
7936 for idx, dev in enumerate(self.instance.disks):
7937 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7938 (self.new_node, idx))
7939 # we pass force_create=True to force LVM creation
7940 for new_lv in dev.children:
7941 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7942 _GetInstanceInfoText(self.instance), False)
7944 # Step 4: dbrd minors and drbd setups changes
7945 # after this, we must manually remove the drbd minors on both the
7946 # error and the success paths
7947 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7948 minors = self.cfg.AllocateDRBDMinor([self.new_node
7949 for dev in self.instance.disks],
7951 logging.debug("Allocated minors %r", minors)
7954 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7955 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7956 (self.new_node, idx))
7957 # create new devices on new_node; note that we create two IDs:
7958 # one without port, so the drbd will be activated without
7959 # networking information on the new node at this stage, and one
7960 # with network, for the latter activation in step 4
7961 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7962 if self.instance.primary_node == o_node1:
7965 assert self.instance.primary_node == o_node2, "Three-node instance?"
7968 new_alone_id = (self.instance.primary_node, self.new_node, None,
7969 p_minor, new_minor, o_secret)
7970 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7971 p_minor, new_minor, o_secret)
7973 iv_names[idx] = (dev, dev.children, new_net_id)
7974 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7976 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7977 logical_id=new_alone_id,
7978 children=dev.children,
7981 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7982 _GetInstanceInfoText(self.instance), False)
7983 except errors.GenericError:
7984 self.cfg.ReleaseDRBDMinors(self.instance.name)
7987 # We have new devices, shutdown the drbd on the old secondary
7988 for idx, dev in enumerate(self.instance.disks):
7989 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7990 self.cfg.SetDiskID(dev, self.target_node)
7991 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7993 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7994 "node: %s" % (idx, msg),
7995 hint=("Please cleanup this device manually as"
7996 " soon as possible"))
7998 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7999 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8000 self.node_secondary_ip,
8001 self.instance.disks)\
8002 [self.instance.primary_node]
8004 msg = result.fail_msg
8006 # detaches didn't succeed (unlikely)
8007 self.cfg.ReleaseDRBDMinors(self.instance.name)
8008 raise errors.OpExecError("Can't detach the disks from the network on"
8009 " old node: %s" % (msg,))
8011 # if we managed to detach at least one, we update all the disks of
8012 # the instance to point to the new secondary
8013 self.lu.LogInfo("Updating instance configuration")
8014 for dev, _, new_logical_id in iv_names.itervalues():
8015 dev.logical_id = new_logical_id
8016 self.cfg.SetDiskID(dev, self.instance.primary_node)
8018 self.cfg.Update(self.instance, feedback_fn)
8020 # and now perform the drbd attach
8021 self.lu.LogInfo("Attaching primary drbds to new secondary"
8022 " (standalone => connected)")
8023 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8025 self.node_secondary_ip,
8026 self.instance.disks,
8029 for to_node, to_result in result.items():
8030 msg = to_result.fail_msg
8032 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8034 hint=("please do a gnt-instance info to see the"
8035 " status of disks"))
8037 if self.early_release:
8038 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8040 self._RemoveOldStorage(self.target_node, iv_names)
8041 # WARNING: we release all node locks here, do not do other RPCs
8042 # than WaitForSync to the primary node
8043 self._ReleaseNodeLock([self.instance.primary_node,
8048 # This can fail as the old devices are degraded and _WaitForSync
8049 # does a combined result over all disks, so we don't check its return value
8050 self.lu.LogStep(cstep, steps_total, "Sync devices")
8052 _WaitForSync(self.lu, self.instance)
8054 # Check all devices manually
8055 self._CheckDevices(self.instance.primary_node, iv_names)
8057 # Step: remove old storage
8058 if not self.early_release:
8059 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8060 self._RemoveOldStorage(self.target_node, iv_names)
8063 class LURepairNodeStorage(NoHooksLU):
8064 """Repairs the volume group on a node.
8069 ("storage_type", _NoDefault, _CheckStorageType),
8070 ("name", _NoDefault, _TNonEmptyString),
8071 ("ignore_consistency", False, _TBool),
8075 def CheckArguments(self):
8076 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8078 storage_type = self.op.storage_type
8080 if (constants.SO_FIX_CONSISTENCY not in
8081 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8082 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8083 " repaired" % storage_type,
8086 def ExpandNames(self):
8087 self.needed_locks = {
8088 locking.LEVEL_NODE: [self.op.node_name],
8091 def _CheckFaultyDisks(self, instance, node_name):
8092 """Ensure faulty disks abort the opcode or at least warn."""
8094 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8096 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8097 " node '%s'" % (instance.name, node_name),
8099 except errors.OpPrereqError, err:
8100 if self.op.ignore_consistency:
8101 self.proc.LogWarning(str(err.args[0]))
8105 def CheckPrereq(self):
8106 """Check prerequisites.
8109 # Check whether any instance on this node has faulty disks
8110 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8111 if not inst.admin_up:
8113 check_nodes = set(inst.all_nodes)
8114 check_nodes.discard(self.op.node_name)
8115 for inst_node_name in check_nodes:
8116 self._CheckFaultyDisks(inst, inst_node_name)
8118 def Exec(self, feedback_fn):
8119 feedback_fn("Repairing storage unit '%s' on %s ..." %
8120 (self.op.name, self.op.node_name))
8122 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8123 result = self.rpc.call_storage_execute(self.op.node_name,
8124 self.op.storage_type, st_args,
8126 constants.SO_FIX_CONSISTENCY)
8127 result.Raise("Failed to repair storage unit '%s' on %s" %
8128 (self.op.name, self.op.node_name))
8131 class LUNodeEvacuationStrategy(NoHooksLU):
8132 """Computes the node evacuation strategy.
8136 ("nodes", _NoDefault, _TListOf(_TNonEmptyString)),
8137 ("remote_node", None, _TMaybeString),
8138 ("iallocator", None, _TMaybeString),
8142 def CheckArguments(self):
8143 if self.op.remote_node is not None and self.op.iallocator is not None:
8144 raise errors.OpPrereqError("Give either the iallocator or the new"
8145 " secondary, not both", errors.ECODE_INVAL)
8147 def ExpandNames(self):
8148 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8149 self.needed_locks = locks = {}
8150 if self.op.remote_node is None:
8151 locks[locking.LEVEL_NODE] = locking.ALL_SET
8153 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8154 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8156 def Exec(self, feedback_fn):
8157 if self.op.remote_node is not None:
8159 for node in self.op.nodes:
8160 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8163 if i.primary_node == self.op.remote_node:
8164 raise errors.OpPrereqError("Node %s is the primary node of"
8165 " instance %s, cannot use it as"
8167 (self.op.remote_node, i.name),
8169 result.append([i.name, self.op.remote_node])
8171 ial = IAllocator(self.cfg, self.rpc,
8172 mode=constants.IALLOCATOR_MODE_MEVAC,
8173 evac_nodes=self.op.nodes)
8174 ial.Run(self.op.iallocator, validate=True)
8176 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8182 class LUGrowDisk(LogicalUnit):
8183 """Grow a disk of an instance.
8187 HTYPE = constants.HTYPE_INSTANCE
8190 ("disk", _NoDefault, _TInt),
8191 ("amount", _NoDefault, _TInt),
8192 ("wait_for_sync", True, _TBool),
8196 def ExpandNames(self):
8197 self._ExpandAndLockInstance()
8198 self.needed_locks[locking.LEVEL_NODE] = []
8199 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8201 def DeclareLocks(self, level):
8202 if level == locking.LEVEL_NODE:
8203 self._LockInstancesNodes()
8205 def BuildHooksEnv(self):
8208 This runs on the master, the primary and all the secondaries.
8212 "DISK": self.op.disk,
8213 "AMOUNT": self.op.amount,
8215 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8216 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8219 def CheckPrereq(self):
8220 """Check prerequisites.
8222 This checks that the instance is in the cluster.
8225 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8226 assert instance is not None, \
8227 "Cannot retrieve locked instance %s" % self.op.instance_name
8228 nodenames = list(instance.all_nodes)
8229 for node in nodenames:
8230 _CheckNodeOnline(self, node)
8232 self.instance = instance
8234 if instance.disk_template not in constants.DTS_GROWABLE:
8235 raise errors.OpPrereqError("Instance's disk layout does not support"
8236 " growing.", errors.ECODE_INVAL)
8238 self.disk = instance.FindDisk(self.op.disk)
8240 if instance.disk_template != constants.DT_FILE:
8241 # TODO: check the free disk space for file, when that feature will be
8243 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8245 def Exec(self, feedback_fn):
8246 """Execute disk grow.
8249 instance = self.instance
8252 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8254 raise errors.OpExecError("Cannot activate block device to grow")
8256 for node in instance.all_nodes:
8257 self.cfg.SetDiskID(disk, node)
8258 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8259 result.Raise("Grow request failed to node %s" % node)
8261 # TODO: Rewrite code to work properly
8262 # DRBD goes into sync mode for a short amount of time after executing the
8263 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8264 # calling "resize" in sync mode fails. Sleeping for a short amount of
8265 # time is a work-around.
8268 disk.RecordGrow(self.op.amount)
8269 self.cfg.Update(instance, feedback_fn)
8270 if self.op.wait_for_sync:
8271 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8273 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8274 " status.\nPlease check the instance.")
8275 if not instance.admin_up:
8276 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8277 elif not instance.admin_up:
8278 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8279 " not supposed to be running because no wait for"
8280 " sync mode was requested.")
8283 class LUQueryInstanceData(NoHooksLU):
8284 """Query runtime instance data.
8288 ("instances", _EmptyList, _TListOf(_TNonEmptyString)),
8289 ("static", False, _TBool),
8293 def ExpandNames(self):
8294 self.needed_locks = {}
8295 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8297 if self.op.instances:
8298 self.wanted_names = []
8299 for name in self.op.instances:
8300 full_name = _ExpandInstanceName(self.cfg, name)
8301 self.wanted_names.append(full_name)
8302 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8304 self.wanted_names = None
8305 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8307 self.needed_locks[locking.LEVEL_NODE] = []
8308 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8310 def DeclareLocks(self, level):
8311 if level == locking.LEVEL_NODE:
8312 self._LockInstancesNodes()
8314 def CheckPrereq(self):
8315 """Check prerequisites.
8317 This only checks the optional instance list against the existing names.
8320 if self.wanted_names is None:
8321 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8323 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8324 in self.wanted_names]
8326 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8327 """Returns the status of a block device
8330 if self.op.static or not node:
8333 self.cfg.SetDiskID(dev, node)
8335 result = self.rpc.call_blockdev_find(node, dev)
8339 result.Raise("Can't compute disk status for %s" % instance_name)
8341 status = result.payload
8345 return (status.dev_path, status.major, status.minor,
8346 status.sync_percent, status.estimated_time,
8347 status.is_degraded, status.ldisk_status)
8349 def _ComputeDiskStatus(self, instance, snode, dev):
8350 """Compute block device status.
8353 if dev.dev_type in constants.LDS_DRBD:
8354 # we change the snode then (otherwise we use the one passed in)
8355 if dev.logical_id[0] == instance.primary_node:
8356 snode = dev.logical_id[1]
8358 snode = dev.logical_id[0]
8360 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8362 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8365 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8366 for child in dev.children]
8371 "iv_name": dev.iv_name,
8372 "dev_type": dev.dev_type,
8373 "logical_id": dev.logical_id,
8374 "physical_id": dev.physical_id,
8375 "pstatus": dev_pstatus,
8376 "sstatus": dev_sstatus,
8377 "children": dev_children,
8384 def Exec(self, feedback_fn):
8385 """Gather and return data"""
8388 cluster = self.cfg.GetClusterInfo()
8390 for instance in self.wanted_instances:
8391 if not self.op.static:
8392 remote_info = self.rpc.call_instance_info(instance.primary_node,
8394 instance.hypervisor)
8395 remote_info.Raise("Error checking node %s" % instance.primary_node)
8396 remote_info = remote_info.payload
8397 if remote_info and "state" in remote_info:
8400 remote_state = "down"
8403 if instance.admin_up:
8406 config_state = "down"
8408 disks = [self._ComputeDiskStatus(instance, None, device)
8409 for device in instance.disks]
8412 "name": instance.name,
8413 "config_state": config_state,
8414 "run_state": remote_state,
8415 "pnode": instance.primary_node,
8416 "snodes": instance.secondary_nodes,
8418 # this happens to be the same format used for hooks
8419 "nics": _NICListToTuple(self, instance.nics),
8420 "disk_template": instance.disk_template,
8422 "hypervisor": instance.hypervisor,
8423 "network_port": instance.network_port,
8424 "hv_instance": instance.hvparams,
8425 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8426 "be_instance": instance.beparams,
8427 "be_actual": cluster.FillBE(instance),
8428 "os_instance": instance.osparams,
8429 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8430 "serial_no": instance.serial_no,
8431 "mtime": instance.mtime,
8432 "ctime": instance.ctime,
8433 "uuid": instance.uuid,
8436 result[instance.name] = idict
8441 class LUSetInstanceParams(LogicalUnit):
8442 """Modifies an instances's parameters.
8445 HPATH = "instance-modify"
8446 HTYPE = constants.HTYPE_INSTANCE
8449 ("nics", _EmptyList, _TList),
8450 ("disks", _EmptyList, _TList),
8451 ("beparams", _EmptyDict, _TDict),
8452 ("hvparams", _EmptyDict, _TDict),
8453 ("disk_template", None, _TMaybeString),
8454 ("remote_node", None, _TMaybeString),
8455 ("os_name", None, _TMaybeString),
8456 ("force_variant", False, _TBool),
8457 ("osparams", None, _TOr(_TDict, _TNone)),
8462 def CheckArguments(self):
8463 if not (self.op.nics or self.op.disks or self.op.disk_template or
8464 self.op.hvparams or self.op.beparams or self.op.os_name):
8465 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8467 if self.op.hvparams:
8468 _CheckGlobalHvParams(self.op.hvparams)
8472 for disk_op, disk_dict in self.op.disks:
8473 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8474 if disk_op == constants.DDM_REMOVE:
8477 elif disk_op == constants.DDM_ADD:
8480 if not isinstance(disk_op, int):
8481 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8482 if not isinstance(disk_dict, dict):
8483 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8484 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8486 if disk_op == constants.DDM_ADD:
8487 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8488 if mode not in constants.DISK_ACCESS_SET:
8489 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8491 size = disk_dict.get('size', None)
8493 raise errors.OpPrereqError("Required disk parameter size missing",
8497 except (TypeError, ValueError), err:
8498 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8499 str(err), errors.ECODE_INVAL)
8500 disk_dict['size'] = size
8502 # modification of disk
8503 if 'size' in disk_dict:
8504 raise errors.OpPrereqError("Disk size change not possible, use"
8505 " grow-disk", errors.ECODE_INVAL)
8507 if disk_addremove > 1:
8508 raise errors.OpPrereqError("Only one disk add or remove operation"
8509 " supported at a time", errors.ECODE_INVAL)
8511 if self.op.disks and self.op.disk_template is not None:
8512 raise errors.OpPrereqError("Disk template conversion and other disk"
8513 " changes not supported at the same time",
8516 if self.op.disk_template:
8517 _CheckDiskTemplate(self.op.disk_template)
8518 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8519 self.op.remote_node is None):
8520 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8521 " one requires specifying a secondary node",
8526 for nic_op, nic_dict in self.op.nics:
8527 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8528 if nic_op == constants.DDM_REMOVE:
8531 elif nic_op == constants.DDM_ADD:
8534 if not isinstance(nic_op, int):
8535 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8536 if not isinstance(nic_dict, dict):
8537 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8538 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8540 # nic_dict should be a dict
8541 nic_ip = nic_dict.get('ip', None)
8542 if nic_ip is not None:
8543 if nic_ip.lower() == constants.VALUE_NONE:
8544 nic_dict['ip'] = None
8546 if not utils.IsValidIP4(nic_ip):
8547 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8550 nic_bridge = nic_dict.get('bridge', None)
8551 nic_link = nic_dict.get('link', None)
8552 if nic_bridge and nic_link:
8553 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8554 " at the same time", errors.ECODE_INVAL)
8555 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8556 nic_dict['bridge'] = None
8557 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8558 nic_dict['link'] = None
8560 if nic_op == constants.DDM_ADD:
8561 nic_mac = nic_dict.get('mac', None)
8563 nic_dict['mac'] = constants.VALUE_AUTO
8565 if 'mac' in nic_dict:
8566 nic_mac = nic_dict['mac']
8567 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8568 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8570 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8571 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8572 " modifying an existing nic",
8575 if nic_addremove > 1:
8576 raise errors.OpPrereqError("Only one NIC add or remove operation"
8577 " supported at a time", errors.ECODE_INVAL)
8579 def ExpandNames(self):
8580 self._ExpandAndLockInstance()
8581 self.needed_locks[locking.LEVEL_NODE] = []
8582 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8584 def DeclareLocks(self, level):
8585 if level == locking.LEVEL_NODE:
8586 self._LockInstancesNodes()
8587 if self.op.disk_template and self.op.remote_node:
8588 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8589 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8591 def BuildHooksEnv(self):
8594 This runs on the master, primary and secondaries.
8598 if constants.BE_MEMORY in self.be_new:
8599 args['memory'] = self.be_new[constants.BE_MEMORY]
8600 if constants.BE_VCPUS in self.be_new:
8601 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8602 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8603 # information at all.
8606 nic_override = dict(self.op.nics)
8607 for idx, nic in enumerate(self.instance.nics):
8608 if idx in nic_override:
8609 this_nic_override = nic_override[idx]
8611 this_nic_override = {}
8612 if 'ip' in this_nic_override:
8613 ip = this_nic_override['ip']
8616 if 'mac' in this_nic_override:
8617 mac = this_nic_override['mac']
8620 if idx in self.nic_pnew:
8621 nicparams = self.nic_pnew[idx]
8623 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8624 mode = nicparams[constants.NIC_MODE]
8625 link = nicparams[constants.NIC_LINK]
8626 args['nics'].append((ip, mac, mode, link))
8627 if constants.DDM_ADD in nic_override:
8628 ip = nic_override[constants.DDM_ADD].get('ip', None)
8629 mac = nic_override[constants.DDM_ADD]['mac']
8630 nicparams = self.nic_pnew[constants.DDM_ADD]
8631 mode = nicparams[constants.NIC_MODE]
8632 link = nicparams[constants.NIC_LINK]
8633 args['nics'].append((ip, mac, mode, link))
8634 elif constants.DDM_REMOVE in nic_override:
8635 del args['nics'][-1]
8637 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8638 if self.op.disk_template:
8639 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8640 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8643 def CheckPrereq(self):
8644 """Check prerequisites.
8646 This only checks the instance list against the existing names.
8649 # checking the new params on the primary/secondary nodes
8651 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8652 cluster = self.cluster = self.cfg.GetClusterInfo()
8653 assert self.instance is not None, \
8654 "Cannot retrieve locked instance %s" % self.op.instance_name
8655 pnode = instance.primary_node
8656 nodelist = list(instance.all_nodes)
8659 if self.op.os_name and not self.op.force:
8660 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8661 self.op.force_variant)
8662 instance_os = self.op.os_name
8664 instance_os = instance.os
8666 if self.op.disk_template:
8667 if instance.disk_template == self.op.disk_template:
8668 raise errors.OpPrereqError("Instance already has disk template %s" %
8669 instance.disk_template, errors.ECODE_INVAL)
8671 if (instance.disk_template,
8672 self.op.disk_template) not in self._DISK_CONVERSIONS:
8673 raise errors.OpPrereqError("Unsupported disk template conversion from"
8674 " %s to %s" % (instance.disk_template,
8675 self.op.disk_template),
8677 _CheckInstanceDown(self, instance, "cannot change disk template")
8678 if self.op.disk_template in constants.DTS_NET_MIRROR:
8679 _CheckNodeOnline(self, self.op.remote_node)
8680 _CheckNodeNotDrained(self, self.op.remote_node)
8681 disks = [{"size": d.size} for d in instance.disks]
8682 required = _ComputeDiskSize(self.op.disk_template, disks)
8683 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8685 # hvparams processing
8686 if self.op.hvparams:
8687 hv_type = instance.hypervisor
8688 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8689 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8690 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8693 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8694 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8695 self.hv_new = hv_new # the new actual values
8696 self.hv_inst = i_hvdict # the new dict (without defaults)
8698 self.hv_new = self.hv_inst = {}
8700 # beparams processing
8701 if self.op.beparams:
8702 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8704 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8705 be_new = cluster.SimpleFillBE(i_bedict)
8706 self.be_new = be_new # the new actual values
8707 self.be_inst = i_bedict # the new dict (without defaults)
8709 self.be_new = self.be_inst = {}
8711 # osparams processing
8712 if self.op.osparams:
8713 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8714 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8715 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8716 self.os_inst = i_osdict # the new dict (without defaults)
8718 self.os_new = self.os_inst = {}
8722 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8723 mem_check_list = [pnode]
8724 if be_new[constants.BE_AUTO_BALANCE]:
8725 # either we changed auto_balance to yes or it was from before
8726 mem_check_list.extend(instance.secondary_nodes)
8727 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8728 instance.hypervisor)
8729 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8730 instance.hypervisor)
8731 pninfo = nodeinfo[pnode]
8732 msg = pninfo.fail_msg
8734 # Assume the primary node is unreachable and go ahead
8735 self.warn.append("Can't get info from primary node %s: %s" %
8737 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8738 self.warn.append("Node data from primary node %s doesn't contain"
8739 " free memory information" % pnode)
8740 elif instance_info.fail_msg:
8741 self.warn.append("Can't get instance runtime information: %s" %
8742 instance_info.fail_msg)
8744 if instance_info.payload:
8745 current_mem = int(instance_info.payload['memory'])
8747 # Assume instance not running
8748 # (there is a slight race condition here, but it's not very probable,
8749 # and we have no other way to check)
8751 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8752 pninfo.payload['memory_free'])
8754 raise errors.OpPrereqError("This change will prevent the instance"
8755 " from starting, due to %d MB of memory"
8756 " missing on its primary node" % miss_mem,
8759 if be_new[constants.BE_AUTO_BALANCE]:
8760 for node, nres in nodeinfo.items():
8761 if node not in instance.secondary_nodes:
8765 self.warn.append("Can't get info from secondary node %s: %s" %
8767 elif not isinstance(nres.payload.get('memory_free', None), int):
8768 self.warn.append("Secondary node %s didn't return free"
8769 " memory information" % node)
8770 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8771 self.warn.append("Not enough memory to failover instance to"
8772 " secondary node %s" % node)
8777 for nic_op, nic_dict in self.op.nics:
8778 if nic_op == constants.DDM_REMOVE:
8779 if not instance.nics:
8780 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8783 if nic_op != constants.DDM_ADD:
8785 if not instance.nics:
8786 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8787 " no NICs" % nic_op,
8789 if nic_op < 0 or nic_op >= len(instance.nics):
8790 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8792 (nic_op, len(instance.nics) - 1),
8794 old_nic_params = instance.nics[nic_op].nicparams
8795 old_nic_ip = instance.nics[nic_op].ip
8800 update_params_dict = dict([(key, nic_dict[key])
8801 for key in constants.NICS_PARAMETERS
8802 if key in nic_dict])
8804 if 'bridge' in nic_dict:
8805 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8807 new_nic_params = _GetUpdatedParams(old_nic_params,
8809 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8810 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8811 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8812 self.nic_pinst[nic_op] = new_nic_params
8813 self.nic_pnew[nic_op] = new_filled_nic_params
8814 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8816 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8817 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8818 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8820 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8822 self.warn.append(msg)
8824 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8825 if new_nic_mode == constants.NIC_MODE_ROUTED:
8826 if 'ip' in nic_dict:
8827 nic_ip = nic_dict['ip']
8831 raise errors.OpPrereqError('Cannot set the nic ip to None'
8832 ' on a routed nic', errors.ECODE_INVAL)
8833 if 'mac' in nic_dict:
8834 nic_mac = nic_dict['mac']
8836 raise errors.OpPrereqError('Cannot set the nic mac to None',
8838 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8839 # otherwise generate the mac
8840 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8842 # or validate/reserve the current one
8844 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8845 except errors.ReservationError:
8846 raise errors.OpPrereqError("MAC address %s already in use"
8847 " in cluster" % nic_mac,
8848 errors.ECODE_NOTUNIQUE)
8851 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8852 raise errors.OpPrereqError("Disk operations not supported for"
8853 " diskless instances",
8855 for disk_op, _ in self.op.disks:
8856 if disk_op == constants.DDM_REMOVE:
8857 if len(instance.disks) == 1:
8858 raise errors.OpPrereqError("Cannot remove the last disk of"
8859 " an instance", errors.ECODE_INVAL)
8860 _CheckInstanceDown(self, instance, "cannot remove disks")
8862 if (disk_op == constants.DDM_ADD and
8863 len(instance.nics) >= constants.MAX_DISKS):
8864 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8865 " add more" % constants.MAX_DISKS,
8867 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8869 if disk_op < 0 or disk_op >= len(instance.disks):
8870 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8872 (disk_op, len(instance.disks)),
8877 def _ConvertPlainToDrbd(self, feedback_fn):
8878 """Converts an instance from plain to drbd.
8881 feedback_fn("Converting template to drbd")
8882 instance = self.instance
8883 pnode = instance.primary_node
8884 snode = self.op.remote_node
8886 # create a fake disk info for _GenerateDiskTemplate
8887 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8888 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8889 instance.name, pnode, [snode],
8890 disk_info, None, None, 0)
8891 info = _GetInstanceInfoText(instance)
8892 feedback_fn("Creating aditional volumes...")
8893 # first, create the missing data and meta devices
8894 for disk in new_disks:
8895 # unfortunately this is... not too nice
8896 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8898 for child in disk.children:
8899 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8900 # at this stage, all new LVs have been created, we can rename the
8902 feedback_fn("Renaming original volumes...")
8903 rename_list = [(o, n.children[0].logical_id)
8904 for (o, n) in zip(instance.disks, new_disks)]
8905 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8906 result.Raise("Failed to rename original LVs")
8908 feedback_fn("Initializing DRBD devices...")
8909 # all child devices are in place, we can now create the DRBD devices
8910 for disk in new_disks:
8911 for node in [pnode, snode]:
8912 f_create = node == pnode
8913 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8915 # at this point, the instance has been modified
8916 instance.disk_template = constants.DT_DRBD8
8917 instance.disks = new_disks
8918 self.cfg.Update(instance, feedback_fn)
8920 # disks are created, waiting for sync
8921 disk_abort = not _WaitForSync(self, instance)
8923 raise errors.OpExecError("There are some degraded disks for"
8924 " this instance, please cleanup manually")
8926 def _ConvertDrbdToPlain(self, feedback_fn):
8927 """Converts an instance from drbd to plain.
8930 instance = self.instance
8931 assert len(instance.secondary_nodes) == 1
8932 pnode = instance.primary_node
8933 snode = instance.secondary_nodes[0]
8934 feedback_fn("Converting template to plain")
8936 old_disks = instance.disks
8937 new_disks = [d.children[0] for d in old_disks]
8939 # copy over size and mode
8940 for parent, child in zip(old_disks, new_disks):
8941 child.size = parent.size
8942 child.mode = parent.mode
8944 # update instance structure
8945 instance.disks = new_disks
8946 instance.disk_template = constants.DT_PLAIN
8947 self.cfg.Update(instance, feedback_fn)
8949 feedback_fn("Removing volumes on the secondary node...")
8950 for disk in old_disks:
8951 self.cfg.SetDiskID(disk, snode)
8952 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8954 self.LogWarning("Could not remove block device %s on node %s,"
8955 " continuing anyway: %s", disk.iv_name, snode, msg)
8957 feedback_fn("Removing unneeded volumes on the primary node...")
8958 for idx, disk in enumerate(old_disks):
8959 meta = disk.children[1]
8960 self.cfg.SetDiskID(meta, pnode)
8961 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8963 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8964 " continuing anyway: %s", idx, pnode, msg)
8967 def Exec(self, feedback_fn):
8968 """Modifies an instance.
8970 All parameters take effect only at the next restart of the instance.
8973 # Process here the warnings from CheckPrereq, as we don't have a
8974 # feedback_fn there.
8975 for warn in self.warn:
8976 feedback_fn("WARNING: %s" % warn)
8979 instance = self.instance
8981 for disk_op, disk_dict in self.op.disks:
8982 if disk_op == constants.DDM_REMOVE:
8983 # remove the last disk
8984 device = instance.disks.pop()
8985 device_idx = len(instance.disks)
8986 for node, disk in device.ComputeNodeTree(instance.primary_node):
8987 self.cfg.SetDiskID(disk, node)
8988 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8990 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8991 " continuing anyway", device_idx, node, msg)
8992 result.append(("disk/%d" % device_idx, "remove"))
8993 elif disk_op == constants.DDM_ADD:
8995 if instance.disk_template == constants.DT_FILE:
8996 file_driver, file_path = instance.disks[0].logical_id
8997 file_path = os.path.dirname(file_path)
8999 file_driver = file_path = None
9000 disk_idx_base = len(instance.disks)
9001 new_disk = _GenerateDiskTemplate(self,
9002 instance.disk_template,
9003 instance.name, instance.primary_node,
9004 instance.secondary_nodes,
9009 instance.disks.append(new_disk)
9010 info = _GetInstanceInfoText(instance)
9012 logging.info("Creating volume %s for instance %s",
9013 new_disk.iv_name, instance.name)
9014 # Note: this needs to be kept in sync with _CreateDisks
9016 for node in instance.all_nodes:
9017 f_create = node == instance.primary_node
9019 _CreateBlockDev(self, node, instance, new_disk,
9020 f_create, info, f_create)
9021 except errors.OpExecError, err:
9022 self.LogWarning("Failed to create volume %s (%s) on"
9024 new_disk.iv_name, new_disk, node, err)
9025 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9026 (new_disk.size, new_disk.mode)))
9028 # change a given disk
9029 instance.disks[disk_op].mode = disk_dict['mode']
9030 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9032 if self.op.disk_template:
9033 r_shut = _ShutdownInstanceDisks(self, instance)
9035 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9036 " proceed with disk template conversion")
9037 mode = (instance.disk_template, self.op.disk_template)
9039 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9041 self.cfg.ReleaseDRBDMinors(instance.name)
9043 result.append(("disk_template", self.op.disk_template))
9046 for nic_op, nic_dict in self.op.nics:
9047 if nic_op == constants.DDM_REMOVE:
9048 # remove the last nic
9049 del instance.nics[-1]
9050 result.append(("nic.%d" % len(instance.nics), "remove"))
9051 elif nic_op == constants.DDM_ADD:
9052 # mac and bridge should be set, by now
9053 mac = nic_dict['mac']
9054 ip = nic_dict.get('ip', None)
9055 nicparams = self.nic_pinst[constants.DDM_ADD]
9056 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9057 instance.nics.append(new_nic)
9058 result.append(("nic.%d" % (len(instance.nics) - 1),
9059 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9060 (new_nic.mac, new_nic.ip,
9061 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9062 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9065 for key in 'mac', 'ip':
9067 setattr(instance.nics[nic_op], key, nic_dict[key])
9068 if nic_op in self.nic_pinst:
9069 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9070 for key, val in nic_dict.iteritems():
9071 result.append(("nic.%s/%d" % (key, nic_op), val))
9074 if self.op.hvparams:
9075 instance.hvparams = self.hv_inst
9076 for key, val in self.op.hvparams.iteritems():
9077 result.append(("hv/%s" % key, val))
9080 if self.op.beparams:
9081 instance.beparams = self.be_inst
9082 for key, val in self.op.beparams.iteritems():
9083 result.append(("be/%s" % key, val))
9087 instance.os = self.op.os_name
9090 if self.op.osparams:
9091 instance.osparams = self.os_inst
9092 for key, val in self.op.osparams.iteritems():
9093 result.append(("os/%s" % key, val))
9095 self.cfg.Update(instance, feedback_fn)
9099 _DISK_CONVERSIONS = {
9100 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9101 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9105 class LUQueryExports(NoHooksLU):
9106 """Query the exports list
9110 ("nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9111 ("use_locking", False, _TBool),
9115 def ExpandNames(self):
9116 self.needed_locks = {}
9117 self.share_locks[locking.LEVEL_NODE] = 1
9118 if not self.op.nodes:
9119 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9121 self.needed_locks[locking.LEVEL_NODE] = \
9122 _GetWantedNodes(self, self.op.nodes)
9124 def Exec(self, feedback_fn):
9125 """Compute the list of all the exported system images.
9128 @return: a dictionary with the structure node->(export-list)
9129 where export-list is a list of the instances exported on
9133 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9134 rpcresult = self.rpc.call_export_list(self.nodes)
9136 for node in rpcresult:
9137 if rpcresult[node].fail_msg:
9138 result[node] = False
9140 result[node] = rpcresult[node].payload
9145 class LUPrepareExport(NoHooksLU):
9146 """Prepares an instance for an export and returns useful information.
9151 ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)),
9155 def ExpandNames(self):
9156 self._ExpandAndLockInstance()
9158 def CheckPrereq(self):
9159 """Check prerequisites.
9162 instance_name = self.op.instance_name
9164 self.instance = self.cfg.GetInstanceInfo(instance_name)
9165 assert self.instance is not None, \
9166 "Cannot retrieve locked instance %s" % self.op.instance_name
9167 _CheckNodeOnline(self, self.instance.primary_node)
9169 self._cds = _GetClusterDomainSecret()
9171 def Exec(self, feedback_fn):
9172 """Prepares an instance for an export.
9175 instance = self.instance
9177 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9178 salt = utils.GenerateSecret(8)
9180 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9181 result = self.rpc.call_x509_cert_create(instance.primary_node,
9182 constants.RIE_CERT_VALIDITY)
9183 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9185 (name, cert_pem) = result.payload
9187 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9191 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9192 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9194 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9200 class LUExportInstance(LogicalUnit):
9201 """Export an instance to an image in the cluster.
9204 HPATH = "instance-export"
9205 HTYPE = constants.HTYPE_INSTANCE
9208 ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)),
9209 ("shutdown", True, _TBool),
9211 ("remove_instance", False, _TBool),
9212 ("ignore_remove_failures", False, _TBool),
9213 ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)),
9214 ("x509_key_name", None, _TOr(_TList, _TNone)),
9215 ("destination_x509_ca", None, _TMaybeString),
9219 def CheckArguments(self):
9220 """Check the arguments.
9223 self.x509_key_name = self.op.x509_key_name
9224 self.dest_x509_ca_pem = self.op.destination_x509_ca
9226 if self.op.remove_instance and not self.op.shutdown:
9227 raise errors.OpPrereqError("Can not remove instance without shutting it"
9230 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9231 if not self.x509_key_name:
9232 raise errors.OpPrereqError("Missing X509 key name for encryption",
9235 if not self.dest_x509_ca_pem:
9236 raise errors.OpPrereqError("Missing destination X509 CA",
9239 def ExpandNames(self):
9240 self._ExpandAndLockInstance()
9242 # Lock all nodes for local exports
9243 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9244 # FIXME: lock only instance primary and destination node
9246 # Sad but true, for now we have do lock all nodes, as we don't know where
9247 # the previous export might be, and in this LU we search for it and
9248 # remove it from its current node. In the future we could fix this by:
9249 # - making a tasklet to search (share-lock all), then create the
9250 # new one, then one to remove, after
9251 # - removing the removal operation altogether
9252 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9254 def DeclareLocks(self, level):
9255 """Last minute lock declaration."""
9256 # All nodes are locked anyway, so nothing to do here.
9258 def BuildHooksEnv(self):
9261 This will run on the master, primary node and target node.
9265 "EXPORT_MODE": self.op.mode,
9266 "EXPORT_NODE": self.op.target_node,
9267 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9268 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9269 # TODO: Generic function for boolean env variables
9270 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9273 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9275 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9277 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9278 nl.append(self.op.target_node)
9282 def CheckPrereq(self):
9283 """Check prerequisites.
9285 This checks that the instance and node names are valid.
9288 instance_name = self.op.instance_name
9290 self.instance = self.cfg.GetInstanceInfo(instance_name)
9291 assert self.instance is not None, \
9292 "Cannot retrieve locked instance %s" % self.op.instance_name
9293 _CheckNodeOnline(self, self.instance.primary_node)
9295 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9296 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9297 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9298 assert self.dst_node is not None
9300 _CheckNodeOnline(self, self.dst_node.name)
9301 _CheckNodeNotDrained(self, self.dst_node.name)
9304 self.dest_disk_info = None
9305 self.dest_x509_ca = None
9307 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9308 self.dst_node = None
9310 if len(self.op.target_node) != len(self.instance.disks):
9311 raise errors.OpPrereqError(("Received destination information for %s"
9312 " disks, but instance %s has %s disks") %
9313 (len(self.op.target_node), instance_name,
9314 len(self.instance.disks)),
9317 cds = _GetClusterDomainSecret()
9319 # Check X509 key name
9321 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9322 except (TypeError, ValueError), err:
9323 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9325 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9326 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9329 # Load and verify CA
9331 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9332 except OpenSSL.crypto.Error, err:
9333 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9334 (err, ), errors.ECODE_INVAL)
9336 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9337 if errcode is not None:
9338 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9339 (msg, ), errors.ECODE_INVAL)
9341 self.dest_x509_ca = cert
9343 # Verify target information
9345 for idx, disk_data in enumerate(self.op.target_node):
9347 (host, port, magic) = \
9348 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9349 except errors.GenericError, err:
9350 raise errors.OpPrereqError("Target info for disk %s: %s" %
9351 (idx, err), errors.ECODE_INVAL)
9353 disk_info.append((host, port, magic))
9355 assert len(disk_info) == len(self.op.target_node)
9356 self.dest_disk_info = disk_info
9359 raise errors.ProgrammerError("Unhandled export mode %r" %
9362 # instance disk type verification
9363 # TODO: Implement export support for file-based disks
9364 for disk in self.instance.disks:
9365 if disk.dev_type == constants.LD_FILE:
9366 raise errors.OpPrereqError("Export not supported for instances with"
9367 " file-based disks", errors.ECODE_INVAL)
9369 def _CleanupExports(self, feedback_fn):
9370 """Removes exports of current instance from all other nodes.
9372 If an instance in a cluster with nodes A..D was exported to node C, its
9373 exports will be removed from the nodes A, B and D.
9376 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9378 nodelist = self.cfg.GetNodeList()
9379 nodelist.remove(self.dst_node.name)
9381 # on one-node clusters nodelist will be empty after the removal
9382 # if we proceed the backup would be removed because OpQueryExports
9383 # substitutes an empty list with the full cluster node list.
9384 iname = self.instance.name
9386 feedback_fn("Removing old exports for instance %s" % iname)
9387 exportlist = self.rpc.call_export_list(nodelist)
9388 for node in exportlist:
9389 if exportlist[node].fail_msg:
9391 if iname in exportlist[node].payload:
9392 msg = self.rpc.call_export_remove(node, iname).fail_msg
9394 self.LogWarning("Could not remove older export for instance %s"
9395 " on node %s: %s", iname, node, msg)
9397 def Exec(self, feedback_fn):
9398 """Export an instance to an image in the cluster.
9401 assert self.op.mode in constants.EXPORT_MODES
9403 instance = self.instance
9404 src_node = instance.primary_node
9406 if self.op.shutdown:
9407 # shutdown the instance, but not the disks
9408 feedback_fn("Shutting down instance %s" % instance.name)
9409 result = self.rpc.call_instance_shutdown(src_node, instance,
9410 self.op.shutdown_timeout)
9411 # TODO: Maybe ignore failures if ignore_remove_failures is set
9412 result.Raise("Could not shutdown instance %s on"
9413 " node %s" % (instance.name, src_node))
9415 # set the disks ID correctly since call_instance_start needs the
9416 # correct drbd minor to create the symlinks
9417 for disk in instance.disks:
9418 self.cfg.SetDiskID(disk, src_node)
9420 activate_disks = (not instance.admin_up)
9423 # Activate the instance disks if we'exporting a stopped instance
9424 feedback_fn("Activating disks for %s" % instance.name)
9425 _StartInstanceDisks(self, instance, None)
9428 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9431 helper.CreateSnapshots()
9433 if (self.op.shutdown and instance.admin_up and
9434 not self.op.remove_instance):
9435 assert not activate_disks
9436 feedback_fn("Starting instance %s" % instance.name)
9437 result = self.rpc.call_instance_start(src_node, instance, None, None)
9438 msg = result.fail_msg
9440 feedback_fn("Failed to start instance: %s" % msg)
9441 _ShutdownInstanceDisks(self, instance)
9442 raise errors.OpExecError("Could not start instance: %s" % msg)
9444 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9445 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9446 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9447 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9448 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9450 (key_name, _, _) = self.x509_key_name
9453 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9456 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9457 key_name, dest_ca_pem,
9462 # Check for backwards compatibility
9463 assert len(dresults) == len(instance.disks)
9464 assert compat.all(isinstance(i, bool) for i in dresults), \
9465 "Not all results are boolean: %r" % dresults
9469 feedback_fn("Deactivating disks for %s" % instance.name)
9470 _ShutdownInstanceDisks(self, instance)
9472 # Remove instance if requested
9473 if self.op.remove_instance:
9474 if not (compat.all(dresults) and fin_resu):
9475 feedback_fn("Not removing instance %s as parts of the export failed" %
9478 feedback_fn("Removing instance %s" % instance.name)
9479 _RemoveInstance(self, feedback_fn, instance,
9480 self.op.ignore_remove_failures)
9482 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9483 self._CleanupExports(feedback_fn)
9485 return fin_resu, dresults
9488 class LURemoveExport(NoHooksLU):
9489 """Remove exports related to the named instance.
9497 def ExpandNames(self):
9498 self.needed_locks = {}
9499 # We need all nodes to be locked in order for RemoveExport to work, but we
9500 # don't need to lock the instance itself, as nothing will happen to it (and
9501 # we can remove exports also for a removed instance)
9502 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9504 def Exec(self, feedback_fn):
9505 """Remove any export.
9508 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9509 # If the instance was not found we'll try with the name that was passed in.
9510 # This will only work if it was an FQDN, though.
9512 if not instance_name:
9514 instance_name = self.op.instance_name
9516 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9517 exportlist = self.rpc.call_export_list(locked_nodes)
9519 for node in exportlist:
9520 msg = exportlist[node].fail_msg
9522 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9524 if instance_name in exportlist[node].payload:
9526 result = self.rpc.call_export_remove(node, instance_name)
9527 msg = result.fail_msg
9529 logging.error("Could not remove export for instance %s"
9530 " on node %s: %s", instance_name, node, msg)
9532 if fqdn_warn and not found:
9533 feedback_fn("Export not found. If trying to remove an export belonging"
9534 " to a deleted instance please use its Fully Qualified"
9538 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9541 This is an abstract class which is the parent of all the other tags LUs.
9545 def ExpandNames(self):
9546 self.needed_locks = {}
9547 if self.op.kind == constants.TAG_NODE:
9548 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9549 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9550 elif self.op.kind == constants.TAG_INSTANCE:
9551 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9552 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9554 def CheckPrereq(self):
9555 """Check prerequisites.
9558 if self.op.kind == constants.TAG_CLUSTER:
9559 self.target = self.cfg.GetClusterInfo()
9560 elif self.op.kind == constants.TAG_NODE:
9561 self.target = self.cfg.GetNodeInfo(self.op.name)
9562 elif self.op.kind == constants.TAG_INSTANCE:
9563 self.target = self.cfg.GetInstanceInfo(self.op.name)
9565 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9566 str(self.op.kind), errors.ECODE_INVAL)
9569 class LUGetTags(TagsLU):
9570 """Returns the tags of a given object.
9574 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9575 ("name", _NoDefault, _TNonEmptyString),
9579 def Exec(self, feedback_fn):
9580 """Returns the tag list.
9583 return list(self.target.GetTags())
9586 class LUSearchTags(NoHooksLU):
9587 """Searches the tags for a given pattern.
9591 ("pattern", _NoDefault, _TNonEmptyString),
9595 def ExpandNames(self):
9596 self.needed_locks = {}
9598 def CheckPrereq(self):
9599 """Check prerequisites.
9601 This checks the pattern passed for validity by compiling it.
9605 self.re = re.compile(self.op.pattern)
9606 except re.error, err:
9607 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9608 (self.op.pattern, err), errors.ECODE_INVAL)
9610 def Exec(self, feedback_fn):
9611 """Returns the tag list.
9615 tgts = [("/cluster", cfg.GetClusterInfo())]
9616 ilist = cfg.GetAllInstancesInfo().values()
9617 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9618 nlist = cfg.GetAllNodesInfo().values()
9619 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9621 for path, target in tgts:
9622 for tag in target.GetTags():
9623 if self.re.search(tag):
9624 results.append((path, tag))
9628 class LUAddTags(TagsLU):
9629 """Sets a tag on a given object.
9633 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9634 ("name", _NoDefault, _TNonEmptyString),
9635 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9639 def CheckPrereq(self):
9640 """Check prerequisites.
9642 This checks the type and length of the tag name and value.
9645 TagsLU.CheckPrereq(self)
9646 for tag in self.op.tags:
9647 objects.TaggableObject.ValidateTag(tag)
9649 def Exec(self, feedback_fn):
9654 for tag in self.op.tags:
9655 self.target.AddTag(tag)
9656 except errors.TagError, err:
9657 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9658 self.cfg.Update(self.target, feedback_fn)
9661 class LUDelTags(TagsLU):
9662 """Delete a list of tags from a given object.
9666 ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)),
9667 ("name", _NoDefault, _TNonEmptyString),
9668 ("tags", _NoDefault, _TListOf(_TNonEmptyString)),
9672 def CheckPrereq(self):
9673 """Check prerequisites.
9675 This checks that we have the given tag.
9678 TagsLU.CheckPrereq(self)
9679 for tag in self.op.tags:
9680 objects.TaggableObject.ValidateTag(tag)
9681 del_tags = frozenset(self.op.tags)
9682 cur_tags = self.target.GetTags()
9683 if not del_tags <= cur_tags:
9684 diff_tags = del_tags - cur_tags
9685 diff_names = ["'%s'" % tag for tag in diff_tags]
9687 raise errors.OpPrereqError("Tag(s) %s not found" %
9688 (",".join(diff_names)), errors.ECODE_NOENT)
9690 def Exec(self, feedback_fn):
9691 """Remove the tag from the object.
9694 for tag in self.op.tags:
9695 self.target.RemoveTag(tag)
9696 self.cfg.Update(self.target, feedback_fn)
9699 class LUTestDelay(NoHooksLU):
9700 """Sleep for a specified amount of time.
9702 This LU sleeps on the master and/or nodes for a specified amount of
9707 ("duration", _NoDefault, _TFloat),
9708 ("on_master", True, _TBool),
9709 ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)),
9710 ("repeat", 0, _TPositiveInt)
9714 def ExpandNames(self):
9715 """Expand names and set required locks.
9717 This expands the node list, if any.
9720 self.needed_locks = {}
9721 if self.op.on_nodes:
9722 # _GetWantedNodes can be used here, but is not always appropriate to use
9723 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9725 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9726 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9728 def _TestDelay(self):
9729 """Do the actual sleep.
9732 if self.op.on_master:
9733 if not utils.TestDelay(self.op.duration):
9734 raise errors.OpExecError("Error during master delay test")
9735 if self.op.on_nodes:
9736 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9737 for node, node_result in result.items():
9738 node_result.Raise("Failure during rpc call to node %s" % node)
9740 def Exec(self, feedback_fn):
9741 """Execute the test delay opcode, with the wanted repetitions.
9744 if self.op.repeat == 0:
9747 top_value = self.op.repeat - 1
9748 for i in range(self.op.repeat):
9749 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9753 class IAllocator(object):
9754 """IAllocator framework.
9756 An IAllocator instance has three sets of attributes:
9757 - cfg that is needed to query the cluster
9758 - input data (all members of the _KEYS class attribute are required)
9759 - four buffer attributes (in|out_data|text), that represent the
9760 input (to the external script) in text and data structure format,
9761 and the output from it, again in two formats
9762 - the result variables from the script (success, info, nodes) for
9766 # pylint: disable-msg=R0902
9767 # lots of instance attributes
9769 "name", "mem_size", "disks", "disk_template",
9770 "os", "tags", "nics", "vcpus", "hypervisor",
9773 "name", "relocate_from",
9779 def __init__(self, cfg, rpc, mode, **kwargs):
9782 # init buffer variables
9783 self.in_text = self.out_text = self.in_data = self.out_data = None
9784 # init all input fields so that pylint is happy
9786 self.mem_size = self.disks = self.disk_template = None
9787 self.os = self.tags = self.nics = self.vcpus = None
9788 self.hypervisor = None
9789 self.relocate_from = None
9791 self.evac_nodes = None
9793 self.required_nodes = None
9794 # init result fields
9795 self.success = self.info = self.result = None
9796 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9797 keyset = self._ALLO_KEYS
9798 fn = self._AddNewInstance
9799 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9800 keyset = self._RELO_KEYS
9801 fn = self._AddRelocateInstance
9802 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9803 keyset = self._EVAC_KEYS
9804 fn = self._AddEvacuateNodes
9806 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9807 " IAllocator" % self.mode)
9809 if key not in keyset:
9810 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9811 " IAllocator" % key)
9812 setattr(self, key, kwargs[key])
9815 if key not in kwargs:
9816 raise errors.ProgrammerError("Missing input parameter '%s' to"
9817 " IAllocator" % key)
9818 self._BuildInputData(fn)
9820 def _ComputeClusterData(self):
9821 """Compute the generic allocator input data.
9823 This is the data that is independent of the actual operation.
9827 cluster_info = cfg.GetClusterInfo()
9830 "version": constants.IALLOCATOR_VERSION,
9831 "cluster_name": cfg.GetClusterName(),
9832 "cluster_tags": list(cluster_info.GetTags()),
9833 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9834 # we don't have job IDs
9836 iinfo = cfg.GetAllInstancesInfo().values()
9837 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9841 node_list = cfg.GetNodeList()
9843 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9844 hypervisor_name = self.hypervisor
9845 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9846 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9847 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9848 hypervisor_name = cluster_info.enabled_hypervisors[0]
9850 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9853 self.rpc.call_all_instances_info(node_list,
9854 cluster_info.enabled_hypervisors)
9855 for nname, nresult in node_data.items():
9856 # first fill in static (config-based) values
9857 ninfo = cfg.GetNodeInfo(nname)
9859 "tags": list(ninfo.GetTags()),
9860 "primary_ip": ninfo.primary_ip,
9861 "secondary_ip": ninfo.secondary_ip,
9862 "offline": ninfo.offline,
9863 "drained": ninfo.drained,
9864 "master_candidate": ninfo.master_candidate,
9867 if not (ninfo.offline or ninfo.drained):
9868 nresult.Raise("Can't get data for node %s" % nname)
9869 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9871 remote_info = nresult.payload
9873 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9874 'vg_size', 'vg_free', 'cpu_total']:
9875 if attr not in remote_info:
9876 raise errors.OpExecError("Node '%s' didn't return attribute"
9877 " '%s'" % (nname, attr))
9878 if not isinstance(remote_info[attr], int):
9879 raise errors.OpExecError("Node '%s' returned invalid value"
9881 (nname, attr, remote_info[attr]))
9882 # compute memory used by primary instances
9883 i_p_mem = i_p_up_mem = 0
9884 for iinfo, beinfo in i_list:
9885 if iinfo.primary_node == nname:
9886 i_p_mem += beinfo[constants.BE_MEMORY]
9887 if iinfo.name not in node_iinfo[nname].payload:
9890 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9891 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9892 remote_info['memory_free'] -= max(0, i_mem_diff)
9895 i_p_up_mem += beinfo[constants.BE_MEMORY]
9897 # compute memory used by instances
9899 "total_memory": remote_info['memory_total'],
9900 "reserved_memory": remote_info['memory_dom0'],
9901 "free_memory": remote_info['memory_free'],
9902 "total_disk": remote_info['vg_size'],
9903 "free_disk": remote_info['vg_free'],
9904 "total_cpus": remote_info['cpu_total'],
9905 "i_pri_memory": i_p_mem,
9906 "i_pri_up_memory": i_p_up_mem,
9910 node_results[nname] = pnr
9911 data["nodes"] = node_results
9915 for iinfo, beinfo in i_list:
9917 for nic in iinfo.nics:
9918 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9919 nic_dict = {"mac": nic.mac,
9921 "mode": filled_params[constants.NIC_MODE],
9922 "link": filled_params[constants.NIC_LINK],
9924 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9925 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9926 nic_data.append(nic_dict)
9928 "tags": list(iinfo.GetTags()),
9929 "admin_up": iinfo.admin_up,
9930 "vcpus": beinfo[constants.BE_VCPUS],
9931 "memory": beinfo[constants.BE_MEMORY],
9933 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9935 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9936 "disk_template": iinfo.disk_template,
9937 "hypervisor": iinfo.hypervisor,
9939 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9941 instance_data[iinfo.name] = pir
9943 data["instances"] = instance_data
9947 def _AddNewInstance(self):
9948 """Add new instance data to allocator structure.
9950 This in combination with _AllocatorGetClusterData will create the
9951 correct structure needed as input for the allocator.
9953 The checks for the completeness of the opcode must have already been
9957 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9959 if self.disk_template in constants.DTS_NET_MIRROR:
9960 self.required_nodes = 2
9962 self.required_nodes = 1
9965 "disk_template": self.disk_template,
9968 "vcpus": self.vcpus,
9969 "memory": self.mem_size,
9970 "disks": self.disks,
9971 "disk_space_total": disk_space,
9973 "required_nodes": self.required_nodes,
9977 def _AddRelocateInstance(self):
9978 """Add relocate instance data to allocator structure.
9980 This in combination with _IAllocatorGetClusterData will create the
9981 correct structure needed as input for the allocator.
9983 The checks for the completeness of the opcode must have already been
9987 instance = self.cfg.GetInstanceInfo(self.name)
9988 if instance is None:
9989 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9990 " IAllocator" % self.name)
9992 if instance.disk_template not in constants.DTS_NET_MIRROR:
9993 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9996 if len(instance.secondary_nodes) != 1:
9997 raise errors.OpPrereqError("Instance has not exactly one secondary node",
10000 self.required_nodes = 1
10001 disk_sizes = [{'size': disk.size} for disk in instance.disks]
10002 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10006 "disk_space_total": disk_space,
10007 "required_nodes": self.required_nodes,
10008 "relocate_from": self.relocate_from,
10012 def _AddEvacuateNodes(self):
10013 """Add evacuate nodes data to allocator structure.
10017 "evac_nodes": self.evac_nodes
10021 def _BuildInputData(self, fn):
10022 """Build input data structures.
10025 self._ComputeClusterData()
10028 request["type"] = self.mode
10029 self.in_data["request"] = request
10031 self.in_text = serializer.Dump(self.in_data)
10033 def Run(self, name, validate=True, call_fn=None):
10034 """Run an instance allocator and return the results.
10037 if call_fn is None:
10038 call_fn = self.rpc.call_iallocator_runner
10040 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10041 result.Raise("Failure while running the iallocator script")
10043 self.out_text = result.payload
10045 self._ValidateResult()
10047 def _ValidateResult(self):
10048 """Process the allocator results.
10050 This will process and if successful save the result in
10051 self.out_data and the other parameters.
10055 rdict = serializer.Load(self.out_text)
10056 except Exception, err:
10057 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10059 if not isinstance(rdict, dict):
10060 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10062 # TODO: remove backwards compatiblity in later versions
10063 if "nodes" in rdict and "result" not in rdict:
10064 rdict["result"] = rdict["nodes"]
10067 for key in "success", "info", "result":
10068 if key not in rdict:
10069 raise errors.OpExecError("Can't parse iallocator results:"
10070 " missing key '%s'" % key)
10071 setattr(self, key, rdict[key])
10073 if not isinstance(rdict["result"], list):
10074 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10076 self.out_data = rdict
10079 class LUTestAllocator(NoHooksLU):
10080 """Run allocator tests.
10082 This LU runs the allocator tests
10086 ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10087 ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10088 ("name", _NoDefault, _TNonEmptyString),
10089 ("nics", _NoDefault, _TOr(_TNone, _TListOf(
10090 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10091 _TOr(_TNone, _TNonEmptyString))))),
10092 ("disks", _NoDefault, _TOr(_TNone, _TList)),
10093 ("hypervisor", None, _TMaybeString),
10094 ("allocator", None, _TMaybeString),
10095 ("tags", _EmptyList, _TListOf(_TNonEmptyString)),
10096 ("mem_size", None, _TOr(_TNone, _TPositiveInt)),
10097 ("vcpus", None, _TOr(_TNone, _TPositiveInt)),
10098 ("os", None, _TMaybeString),
10099 ("disk_template", None, _TMaybeString),
10100 ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))),
10103 def CheckPrereq(self):
10104 """Check prerequisites.
10106 This checks the opcode parameters depending on the director and mode test.
10109 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10110 for attr in ["mem_size", "disks", "disk_template",
10111 "os", "tags", "nics", "vcpus"]:
10112 if not hasattr(self.op, attr):
10113 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10114 attr, errors.ECODE_INVAL)
10115 iname = self.cfg.ExpandInstanceName(self.op.name)
10116 if iname is not None:
10117 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10118 iname, errors.ECODE_EXISTS)
10119 if not isinstance(self.op.nics, list):
10120 raise errors.OpPrereqError("Invalid parameter 'nics'",
10121 errors.ECODE_INVAL)
10122 if not isinstance(self.op.disks, list):
10123 raise errors.OpPrereqError("Invalid parameter 'disks'",
10124 errors.ECODE_INVAL)
10125 for row in self.op.disks:
10126 if (not isinstance(row, dict) or
10127 "size" not in row or
10128 not isinstance(row["size"], int) or
10129 "mode" not in row or
10130 row["mode"] not in ['r', 'w']):
10131 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10132 " parameter", errors.ECODE_INVAL)
10133 if self.op.hypervisor is None:
10134 self.op.hypervisor = self.cfg.GetHypervisorType()
10135 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10136 fname = _ExpandInstanceName(self.cfg, self.op.name)
10137 self.op.name = fname
10138 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10139 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10140 if not hasattr(self.op, "evac_nodes"):
10141 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10142 " opcode input", errors.ECODE_INVAL)
10144 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10145 self.op.mode, errors.ECODE_INVAL)
10147 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10148 if self.op.allocator is None:
10149 raise errors.OpPrereqError("Missing allocator name",
10150 errors.ECODE_INVAL)
10151 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10152 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10153 self.op.direction, errors.ECODE_INVAL)
10155 def Exec(self, feedback_fn):
10156 """Run the allocator test.
10159 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10160 ial = IAllocator(self.cfg, self.rpc,
10163 mem_size=self.op.mem_size,
10164 disks=self.op.disks,
10165 disk_template=self.op.disk_template,
10169 vcpus=self.op.vcpus,
10170 hypervisor=self.op.hypervisor,
10172 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10173 ial = IAllocator(self.cfg, self.rpc,
10176 relocate_from=list(self.relocate_from),
10178 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10179 ial = IAllocator(self.cfg, self.rpc,
10181 evac_nodes=self.op.evac_nodes)
10183 raise errors.ProgrammerError("Uncatched mode %s in"
10184 " LUTestAllocator.Exec", self.op.mode)
10186 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10187 result = ial.in_text
10189 ial.Run(self.op.allocator, validate=False)
10190 result = ial.out_text