4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
40 from ganeti import ssh
41 from ganeti import utils
42 from ganeti import errors
43 from ganeti import hypervisor
44 from ganeti import locking
45 from ganeti import constants
46 from ganeti import objects
47 from ganeti import serializer
48 from ganeti import ssconf
49 from ganeti import uidpool
50 from ganeti import compat
51 from ganeti import masterd
53 import ganeti.masterd.instance # pylint: disable-msg=W0611
56 # Modifiable default values; need to define these here before the
60 """Returns an empty list.
67 """Returns an empty dict.
75 """Checks if the given value is not None.
78 return val is not None
82 """Checks if the given value is None.
89 """Checks if the given value is a boolean.
92 return isinstance(val, bool)
96 """Checks if the given value is an integer.
99 return isinstance(val, int)
103 """Checks if the given value is a float.
106 return isinstance(val, float)
110 """Checks if the given value is a string.
113 return isinstance(val, basestring)
117 """Checks if a given value evaluates to a boolean True value.
123 def _TElemOf(target_list):
124 """Builds a function that checks if a given value is a member of a list.
127 return lambda val: val in target_list
132 """Checks if the given value is a list.
135 return isinstance(val, list)
139 """Checks if the given value is a dictionary.
142 return isinstance(val, dict)
147 """Combine multiple functions using an AND operation.
151 return compat.all(t(val) for t in args)
156 """Combine multiple functions using an AND operation.
160 return compat.any(t(val) for t in args)
167 _TNEString = _TAnd(_TString, _TTrue)
171 _TPInt = _TAnd(_TInt, lambda v: v >= 0)
174 def _TListOf(my_type):
175 """Checks if a given value is a list with all elements of the same type.
179 lambda lst: compat.all(lst, my_type))
182 def _TDictOf(key_type, val_type):
183 """Checks a dict type for the type of its key/values.
187 lambda my_dict: (compat.all(my_dict.keys(), key_type) and
188 compat.all(my_dict.values(), val_type)))
192 class LogicalUnit(object):
193 """Logical Unit base class.
195 Subclasses must follow these rules:
196 - implement ExpandNames
197 - implement CheckPrereq (except when tasklets are used)
198 - implement Exec (except when tasklets are used)
199 - implement BuildHooksEnv
200 - redefine HPATH and HTYPE
201 - optionally redefine their run requirements:
202 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
204 Note that all commands require root permissions.
206 @ivar dry_run_result: the value (if any) that will be returned to the caller
207 in dry-run mode (signalled by opcode dry_run parameter)
208 @cvar _OP_DEFS: a list of opcode attributes and the defaults values
209 they should get if not already existing
218 def __init__(self, processor, op, context, rpc):
219 """Constructor for LogicalUnit.
221 This needs to be overridden in derived classes in order to check op
225 self.proc = processor
227 self.cfg = context.cfg
228 self.context = context
230 # Dicts used to declare locking needs to mcpu
231 self.needed_locks = None
232 self.acquired_locks = {}
233 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
235 self.remove_locks = {}
236 # Used to force good behavior when calling helper functions
237 self.recalculate_locks = {}
240 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
241 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
242 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
243 # support for dry-run
244 self.dry_run_result = None
245 # support for generic debug attribute
246 if (not hasattr(self.op, "debug_level") or
247 not isinstance(self.op.debug_level, int)):
248 self.op.debug_level = 0
253 for aname, aval in self._OP_DEFS:
254 if not hasattr(self.op, aname):
259 setattr(self.op, aname, dval)
261 for attr_name, test in self._OP_REQP:
262 if not hasattr(op, attr_name):
263 raise errors.OpPrereqError("Required parameter '%s' missing" %
264 attr_name, errors.ECODE_INVAL)
265 attr_val = getattr(op, attr_name, None)
266 if not callable(test):
267 raise errors.ProgrammerError("Validation for parameter '%s' failed,"
268 " given type is not a proper type (%s)" %
270 if not test(attr_val):
271 raise errors.OpPrereqError("Parameter '%s' has invalid type" %
272 attr_name, errors.ECODE_INVAL)
274 self.CheckArguments()
277 """Returns the SshRunner object
281 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
284 ssh = property(fget=__GetSSH)
286 def CheckArguments(self):
287 """Check syntactic validity for the opcode arguments.
289 This method is for doing a simple syntactic check and ensure
290 validity of opcode parameters, without any cluster-related
291 checks. While the same can be accomplished in ExpandNames and/or
292 CheckPrereq, doing these separate is better because:
294 - ExpandNames is left as as purely a lock-related function
295 - CheckPrereq is run after we have acquired locks (and possible
298 The function is allowed to change the self.op attribute so that
299 later methods can no longer worry about missing parameters.
304 def ExpandNames(self):
305 """Expand names for this LU.
307 This method is called before starting to execute the opcode, and it should
308 update all the parameters of the opcode to their canonical form (e.g. a
309 short node name must be fully expanded after this method has successfully
310 completed). This way locking, hooks, logging, ecc. can work correctly.
312 LUs which implement this method must also populate the self.needed_locks
313 member, as a dict with lock levels as keys, and a list of needed lock names
316 - use an empty dict if you don't need any lock
317 - if you don't need any lock at a particular level omit that level
318 - don't put anything for the BGL level
319 - if you want all locks at a level use locking.ALL_SET as a value
321 If you need to share locks (rather than acquire them exclusively) at one
322 level you can modify self.share_locks, setting a true value (usually 1) for
323 that level. By default locks are not shared.
325 This function can also define a list of tasklets, which then will be
326 executed in order instead of the usual LU-level CheckPrereq and Exec
327 functions, if those are not defined by the LU.
331 # Acquire all nodes and one instance
332 self.needed_locks = {
333 locking.LEVEL_NODE: locking.ALL_SET,
334 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
336 # Acquire just two nodes
337 self.needed_locks = {
338 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
341 self.needed_locks = {} # No, you can't leave it to the default value None
344 # The implementation of this method is mandatory only if the new LU is
345 # concurrent, so that old LUs don't need to be changed all at the same
348 self.needed_locks = {} # Exclusive LUs don't need locks.
350 raise NotImplementedError
352 def DeclareLocks(self, level):
353 """Declare LU locking needs for a level
355 While most LUs can just declare their locking needs at ExpandNames time,
356 sometimes there's the need to calculate some locks after having acquired
357 the ones before. This function is called just before acquiring locks at a
358 particular level, but after acquiring the ones at lower levels, and permits
359 such calculations. It can be used to modify self.needed_locks, and by
360 default it does nothing.
362 This function is only called if you have something already set in
363 self.needed_locks for the level.
365 @param level: Locking level which is going to be locked
366 @type level: member of ganeti.locking.LEVELS
370 def CheckPrereq(self):
371 """Check prerequisites for this LU.
373 This method should check that the prerequisites for the execution
374 of this LU are fulfilled. It can do internode communication, but
375 it should be idempotent - no cluster or system changes are
378 The method should raise errors.OpPrereqError in case something is
379 not fulfilled. Its return value is ignored.
381 This method should also update all the parameters of the opcode to
382 their canonical form if it hasn't been done by ExpandNames before.
385 if self.tasklets is not None:
386 for (idx, tl) in enumerate(self.tasklets):
387 logging.debug("Checking prerequisites for tasklet %s/%s",
388 idx + 1, len(self.tasklets))
393 def Exec(self, feedback_fn):
396 This method should implement the actual work. It should raise
397 errors.OpExecError for failures that are somewhat dealt with in
401 if self.tasklets is not None:
402 for (idx, tl) in enumerate(self.tasklets):
403 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
406 raise NotImplementedError
408 def BuildHooksEnv(self):
409 """Build hooks environment for this LU.
411 This method should return a three-node tuple consisting of: a dict
412 containing the environment that will be used for running the
413 specific hook for this LU, a list of node names on which the hook
414 should run before the execution, and a list of node names on which
415 the hook should run after the execution.
417 The keys of the dict must not have 'GANETI_' prefixed as this will
418 be handled in the hooks runner. Also note additional keys will be
419 added by the hooks runner. If the LU doesn't define any
420 environment, an empty dict (and not None) should be returned.
422 No nodes should be returned as an empty list (and not None).
424 Note that if the HPATH for a LU class is None, this function will
428 raise NotImplementedError
430 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
431 """Notify the LU about the results of its hooks.
433 This method is called every time a hooks phase is executed, and notifies
434 the Logical Unit about the hooks' result. The LU can then use it to alter
435 its result based on the hooks. By default the method does nothing and the
436 previous result is passed back unchanged but any LU can define it if it
437 wants to use the local cluster hook-scripts somehow.
439 @param phase: one of L{constants.HOOKS_PHASE_POST} or
440 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
441 @param hook_results: the results of the multi-node hooks rpc call
442 @param feedback_fn: function used send feedback back to the caller
443 @param lu_result: the previous Exec result this LU had, or None
445 @return: the new Exec result, based on the previous result
449 # API must be kept, thus we ignore the unused argument and could
450 # be a function warnings
451 # pylint: disable-msg=W0613,R0201
454 def _ExpandAndLockInstance(self):
455 """Helper function to expand and lock an instance.
457 Many LUs that work on an instance take its name in self.op.instance_name
458 and need to expand it and then declare the expanded name for locking. This
459 function does it, and then updates self.op.instance_name to the expanded
460 name. It also initializes needed_locks as a dict, if this hasn't been done
464 if self.needed_locks is None:
465 self.needed_locks = {}
467 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
468 "_ExpandAndLockInstance called with instance-level locks set"
469 self.op.instance_name = _ExpandInstanceName(self.cfg,
470 self.op.instance_name)
471 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
473 def _LockInstancesNodes(self, primary_only=False):
474 """Helper function to declare instances' nodes for locking.
476 This function should be called after locking one or more instances to lock
477 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
478 with all primary or secondary nodes for instances already locked and
479 present in self.needed_locks[locking.LEVEL_INSTANCE].
481 It should be called from DeclareLocks, and for safety only works if
482 self.recalculate_locks[locking.LEVEL_NODE] is set.
484 In the future it may grow parameters to just lock some instance's nodes, or
485 to just lock primaries or secondary nodes, if needed.
487 If should be called in DeclareLocks in a way similar to::
489 if level == locking.LEVEL_NODE:
490 self._LockInstancesNodes()
492 @type primary_only: boolean
493 @param primary_only: only lock primary nodes of locked instances
496 assert locking.LEVEL_NODE in self.recalculate_locks, \
497 "_LockInstancesNodes helper function called with no nodes to recalculate"
499 # TODO: check if we're really been called with the instance locks held
501 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
502 # future we might want to have different behaviors depending on the value
503 # of self.recalculate_locks[locking.LEVEL_NODE]
505 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
506 instance = self.context.cfg.GetInstanceInfo(instance_name)
507 wanted_nodes.append(instance.primary_node)
509 wanted_nodes.extend(instance.secondary_nodes)
511 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
512 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
513 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
514 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
516 del self.recalculate_locks[locking.LEVEL_NODE]
519 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
520 """Simple LU which runs no hooks.
522 This LU is intended as a parent for other LogicalUnits which will
523 run no hooks, in order to reduce duplicate code.
529 def BuildHooksEnv(self):
530 """Empty BuildHooksEnv for NoHooksLu.
532 This just raises an error.
535 assert False, "BuildHooksEnv called for NoHooksLUs"
539 """Tasklet base class.
541 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
542 they can mix legacy code with tasklets. Locking needs to be done in the LU,
543 tasklets know nothing about locks.
545 Subclasses must follow these rules:
546 - Implement CheckPrereq
550 def __init__(self, lu):
557 def CheckPrereq(self):
558 """Check prerequisites for this tasklets.
560 This method should check whether the prerequisites for the execution of
561 this tasklet are fulfilled. It can do internode communication, but it
562 should be idempotent - no cluster or system changes are allowed.
564 The method should raise errors.OpPrereqError in case something is not
565 fulfilled. Its return value is ignored.
567 This method should also update all parameters to their canonical form if it
568 hasn't been done before.
573 def Exec(self, feedback_fn):
574 """Execute the tasklet.
576 This method should implement the actual work. It should raise
577 errors.OpExecError for failures that are somewhat dealt with in code, or
581 raise NotImplementedError
584 def _GetWantedNodes(lu, nodes):
585 """Returns list of checked and expanded node names.
587 @type lu: L{LogicalUnit}
588 @param lu: the logical unit on whose behalf we execute
590 @param nodes: list of node names or None for all nodes
592 @return: the list of nodes, sorted
593 @raise errors.ProgrammerError: if the nodes parameter is wrong type
597 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
598 " non-empty list of nodes whose name is to be expanded.")
600 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
601 return utils.NiceSort(wanted)
604 def _GetWantedInstances(lu, instances):
605 """Returns list of checked and expanded instance names.
607 @type lu: L{LogicalUnit}
608 @param lu: the logical unit on whose behalf we execute
609 @type instances: list
610 @param instances: list of instance names or None for all instances
612 @return: the list of instances, sorted
613 @raise errors.OpPrereqError: if the instances parameter is wrong type
614 @raise errors.OpPrereqError: if any of the passed instances is not found
618 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
620 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
624 def _GetUpdatedParams(old_params, update_dict,
625 use_default=True, use_none=False):
626 """Return the new version of a parameter dictionary.
628 @type old_params: dict
629 @param old_params: old parameters
630 @type update_dict: dict
631 @param update_dict: dict containing new parameter values, or
632 constants.VALUE_DEFAULT to reset the parameter to its default
634 @param use_default: boolean
635 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
636 values as 'to be deleted' values
637 @param use_none: boolean
638 @type use_none: whether to recognise C{None} values as 'to be
641 @return: the new parameter dictionary
644 params_copy = copy.deepcopy(old_params)
645 for key, val in update_dict.iteritems():
646 if ((use_default and val == constants.VALUE_DEFAULT) or
647 (use_none and val is None)):
653 params_copy[key] = val
657 def _CheckOutputFields(static, dynamic, selected):
658 """Checks whether all selected fields are valid.
660 @type static: L{utils.FieldSet}
661 @param static: static fields set
662 @type dynamic: L{utils.FieldSet}
663 @param dynamic: dynamic fields set
670 delta = f.NonMatching(selected)
672 raise errors.OpPrereqError("Unknown output fields selected: %s"
673 % ",".join(delta), errors.ECODE_INVAL)
676 def _CheckBooleanOpField(op, name):
677 """Validates boolean opcode parameters.
679 This will ensure that an opcode parameter is either a boolean value,
680 or None (but that it always exists).
683 val = getattr(op, name, None)
684 if not (val is None or isinstance(val, bool)):
685 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
686 (name, str(val)), errors.ECODE_INVAL)
687 setattr(op, name, val)
690 def _CheckGlobalHvParams(params):
691 """Validates that given hypervisor params are not global ones.
693 This will ensure that instances don't get customised versions of
697 used_globals = constants.HVC_GLOBALS.intersection(params)
699 msg = ("The following hypervisor parameters are global and cannot"
700 " be customized at instance level, please modify them at"
701 " cluster level: %s" % utils.CommaJoin(used_globals))
702 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
705 def _CheckNodeOnline(lu, node):
706 """Ensure that a given node is online.
708 @param lu: the LU on behalf of which we make the check
709 @param node: the node to check
710 @raise errors.OpPrereqError: if the node is offline
713 if lu.cfg.GetNodeInfo(node).offline:
714 raise errors.OpPrereqError("Can't use offline node %s" % node,
718 def _CheckNodeNotDrained(lu, node):
719 """Ensure that a given node is not drained.
721 @param lu: the LU on behalf of which we make the check
722 @param node: the node to check
723 @raise errors.OpPrereqError: if the node is drained
726 if lu.cfg.GetNodeInfo(node).drained:
727 raise errors.OpPrereqError("Can't use drained node %s" % node,
731 def _CheckNodeHasOS(lu, node, os_name, force_variant):
732 """Ensure that a node supports a given OS.
734 @param lu: the LU on behalf of which we make the check
735 @param node: the node to check
736 @param os_name: the OS to query about
737 @param force_variant: whether to ignore variant errors
738 @raise errors.OpPrereqError: if the node is not supporting the OS
741 result = lu.rpc.call_os_get(node, os_name)
742 result.Raise("OS '%s' not in supported OS list for node %s" %
744 prereq=True, ecode=errors.ECODE_INVAL)
745 if not force_variant:
746 _CheckOSVariant(result.payload, os_name)
749 def _RequireFileStorage():
750 """Checks that file storage is enabled.
752 @raise errors.OpPrereqError: when file storage is disabled
755 if not constants.ENABLE_FILE_STORAGE:
756 raise errors.OpPrereqError("File storage disabled at configure time",
760 def _CheckDiskTemplate(template):
761 """Ensure a given disk template is valid.
764 if template not in constants.DISK_TEMPLATES:
765 msg = ("Invalid disk template name '%s', valid templates are: %s" %
766 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
767 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
768 if template == constants.DT_FILE:
769 _RequireFileStorage()
772 def _CheckStorageType(storage_type):
773 """Ensure a given storage type is valid.
776 if storage_type not in constants.VALID_STORAGE_TYPES:
777 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
779 if storage_type == constants.ST_FILE:
780 _RequireFileStorage()
784 def _GetClusterDomainSecret():
785 """Reads the cluster domain secret.
788 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
792 def _CheckInstanceDown(lu, instance, reason):
793 """Ensure that an instance is not running."""
794 if instance.admin_up:
795 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
796 (instance.name, reason), errors.ECODE_STATE)
798 pnode = instance.primary_node
799 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
800 ins_l.Raise("Can't contact node %s for instance information" % pnode,
801 prereq=True, ecode=errors.ECODE_ENVIRON)
803 if instance.name in ins_l.payload:
804 raise errors.OpPrereqError("Instance %s is running, %s" %
805 (instance.name, reason), errors.ECODE_STATE)
808 def _ExpandItemName(fn, name, kind):
809 """Expand an item name.
811 @param fn: the function to use for expansion
812 @param name: requested item name
813 @param kind: text description ('Node' or 'Instance')
814 @return: the resolved (full) name
815 @raise errors.OpPrereqError: if the item is not found
819 if full_name is None:
820 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
825 def _ExpandNodeName(cfg, name):
826 """Wrapper over L{_ExpandItemName} for nodes."""
827 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
830 def _ExpandInstanceName(cfg, name):
831 """Wrapper over L{_ExpandItemName} for instance."""
832 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
835 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
836 memory, vcpus, nics, disk_template, disks,
837 bep, hvp, hypervisor_name):
838 """Builds instance related env variables for hooks
840 This builds the hook environment from individual variables.
843 @param name: the name of the instance
844 @type primary_node: string
845 @param primary_node: the name of the instance's primary node
846 @type secondary_nodes: list
847 @param secondary_nodes: list of secondary nodes as strings
848 @type os_type: string
849 @param os_type: the name of the instance's OS
850 @type status: boolean
851 @param status: the should_run status of the instance
853 @param memory: the memory size of the instance
855 @param vcpus: the count of VCPUs the instance has
857 @param nics: list of tuples (ip, mac, mode, link) representing
858 the NICs the instance has
859 @type disk_template: string
860 @param disk_template: the disk template of the instance
862 @param disks: the list of (size, mode) pairs
864 @param bep: the backend parameters for the instance
866 @param hvp: the hypervisor parameters for the instance
867 @type hypervisor_name: string
868 @param hypervisor_name: the hypervisor for the instance
870 @return: the hook environment for this instance
879 "INSTANCE_NAME": name,
880 "INSTANCE_PRIMARY": primary_node,
881 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
882 "INSTANCE_OS_TYPE": os_type,
883 "INSTANCE_STATUS": str_status,
884 "INSTANCE_MEMORY": memory,
885 "INSTANCE_VCPUS": vcpus,
886 "INSTANCE_DISK_TEMPLATE": disk_template,
887 "INSTANCE_HYPERVISOR": hypervisor_name,
891 nic_count = len(nics)
892 for idx, (ip, mac, mode, link) in enumerate(nics):
895 env["INSTANCE_NIC%d_IP" % idx] = ip
896 env["INSTANCE_NIC%d_MAC" % idx] = mac
897 env["INSTANCE_NIC%d_MODE" % idx] = mode
898 env["INSTANCE_NIC%d_LINK" % idx] = link
899 if mode == constants.NIC_MODE_BRIDGED:
900 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
904 env["INSTANCE_NIC_COUNT"] = nic_count
907 disk_count = len(disks)
908 for idx, (size, mode) in enumerate(disks):
909 env["INSTANCE_DISK%d_SIZE" % idx] = size
910 env["INSTANCE_DISK%d_MODE" % idx] = mode
914 env["INSTANCE_DISK_COUNT"] = disk_count
916 for source, kind in [(bep, "BE"), (hvp, "HV")]:
917 for key, value in source.items():
918 env["INSTANCE_%s_%s" % (kind, key)] = value
923 def _NICListToTuple(lu, nics):
924 """Build a list of nic information tuples.
926 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
927 value in LUQueryInstanceData.
929 @type lu: L{LogicalUnit}
930 @param lu: the logical unit on whose behalf we execute
931 @type nics: list of L{objects.NIC}
932 @param nics: list of nics to convert to hooks tuples
936 cluster = lu.cfg.GetClusterInfo()
940 filled_params = cluster.SimpleFillNIC(nic.nicparams)
941 mode = filled_params[constants.NIC_MODE]
942 link = filled_params[constants.NIC_LINK]
943 hooks_nics.append((ip, mac, mode, link))
947 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
948 """Builds instance related env variables for hooks from an object.
950 @type lu: L{LogicalUnit}
951 @param lu: the logical unit on whose behalf we execute
952 @type instance: L{objects.Instance}
953 @param instance: the instance for which we should build the
956 @param override: dictionary with key/values that will override
959 @return: the hook environment dictionary
962 cluster = lu.cfg.GetClusterInfo()
963 bep = cluster.FillBE(instance)
964 hvp = cluster.FillHV(instance)
966 'name': instance.name,
967 'primary_node': instance.primary_node,
968 'secondary_nodes': instance.secondary_nodes,
969 'os_type': instance.os,
970 'status': instance.admin_up,
971 'memory': bep[constants.BE_MEMORY],
972 'vcpus': bep[constants.BE_VCPUS],
973 'nics': _NICListToTuple(lu, instance.nics),
974 'disk_template': instance.disk_template,
975 'disks': [(disk.size, disk.mode) for disk in instance.disks],
978 'hypervisor_name': instance.hypervisor,
981 args.update(override)
982 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
985 def _AdjustCandidatePool(lu, exceptions):
986 """Adjust the candidate pool after node operations.
989 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
991 lu.LogInfo("Promoted nodes to master candidate role: %s",
992 utils.CommaJoin(node.name for node in mod_list))
993 for name in mod_list:
994 lu.context.ReaddNode(name)
995 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
997 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1001 def _DecideSelfPromotion(lu, exceptions=None):
1002 """Decide whether I should promote myself as a master candidate.
1005 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1006 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1007 # the new node will increase mc_max with one, so:
1008 mc_should = min(mc_should + 1, cp_size)
1009 return mc_now < mc_should
1012 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1013 """Check that the brigdes needed by a list of nics exist.
1016 cluster = lu.cfg.GetClusterInfo()
1017 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1018 brlist = [params[constants.NIC_LINK] for params in paramslist
1019 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1021 result = lu.rpc.call_bridges_exist(target_node, brlist)
1022 result.Raise("Error checking bridges on destination node '%s'" %
1023 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1026 def _CheckInstanceBridgesExist(lu, instance, node=None):
1027 """Check that the brigdes needed by an instance exist.
1031 node = instance.primary_node
1032 _CheckNicsBridgesExist(lu, instance.nics, node)
1035 def _CheckOSVariant(os_obj, name):
1036 """Check whether an OS name conforms to the os variants specification.
1038 @type os_obj: L{objects.OS}
1039 @param os_obj: OS object to check
1041 @param name: OS name passed by the user, to check for validity
1044 if not os_obj.supported_variants:
1047 variant = name.split("+", 1)[1]
1049 raise errors.OpPrereqError("OS name must include a variant",
1052 if variant not in os_obj.supported_variants:
1053 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1056 def _GetNodeInstancesInner(cfg, fn):
1057 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1060 def _GetNodeInstances(cfg, node_name):
1061 """Returns a list of all primary and secondary instances on a node.
1065 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1068 def _GetNodePrimaryInstances(cfg, node_name):
1069 """Returns primary instances on a node.
1072 return _GetNodeInstancesInner(cfg,
1073 lambda inst: node_name == inst.primary_node)
1076 def _GetNodeSecondaryInstances(cfg, node_name):
1077 """Returns secondary instances on a node.
1080 return _GetNodeInstancesInner(cfg,
1081 lambda inst: node_name in inst.secondary_nodes)
1084 def _GetStorageTypeArgs(cfg, storage_type):
1085 """Returns the arguments for a storage type.
1088 # Special case for file storage
1089 if storage_type == constants.ST_FILE:
1090 # storage.FileStorage wants a list of storage directories
1091 return [[cfg.GetFileStorageDir()]]
1096 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1099 for dev in instance.disks:
1100 cfg.SetDiskID(dev, node_name)
1102 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1103 result.Raise("Failed to get disk status from node %s" % node_name,
1104 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1106 for idx, bdev_status in enumerate(result.payload):
1107 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1113 class LUPostInitCluster(LogicalUnit):
1114 """Logical unit for running hooks after cluster initialization.
1117 HPATH = "cluster-init"
1118 HTYPE = constants.HTYPE_CLUSTER
1121 def BuildHooksEnv(self):
1125 env = {"OP_TARGET": self.cfg.GetClusterName()}
1126 mn = self.cfg.GetMasterNode()
1127 return env, [], [mn]
1129 def Exec(self, feedback_fn):
1136 class LUDestroyCluster(LogicalUnit):
1137 """Logical unit for destroying the cluster.
1140 HPATH = "cluster-destroy"
1141 HTYPE = constants.HTYPE_CLUSTER
1144 def BuildHooksEnv(self):
1148 env = {"OP_TARGET": self.cfg.GetClusterName()}
1151 def CheckPrereq(self):
1152 """Check prerequisites.
1154 This checks whether the cluster is empty.
1156 Any errors are signaled by raising errors.OpPrereqError.
1159 master = self.cfg.GetMasterNode()
1161 nodelist = self.cfg.GetNodeList()
1162 if len(nodelist) != 1 or nodelist[0] != master:
1163 raise errors.OpPrereqError("There are still %d node(s) in"
1164 " this cluster." % (len(nodelist) - 1),
1166 instancelist = self.cfg.GetInstanceList()
1168 raise errors.OpPrereqError("There are still %d instance(s) in"
1169 " this cluster." % len(instancelist),
1172 def Exec(self, feedback_fn):
1173 """Destroys the cluster.
1176 master = self.cfg.GetMasterNode()
1177 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1179 # Run post hooks on master node before it's removed
1180 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1182 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1184 # pylint: disable-msg=W0702
1185 self.LogWarning("Errors occurred running hooks on %s" % master)
1187 result = self.rpc.call_node_stop_master(master, False)
1188 result.Raise("Could not disable the master role")
1190 if modify_ssh_setup:
1191 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1192 utils.CreateBackup(priv_key)
1193 utils.CreateBackup(pub_key)
1198 def _VerifyCertificate(filename):
1199 """Verifies a certificate for LUVerifyCluster.
1201 @type filename: string
1202 @param filename: Path to PEM file
1206 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1207 utils.ReadFile(filename))
1208 except Exception, err: # pylint: disable-msg=W0703
1209 return (LUVerifyCluster.ETYPE_ERROR,
1210 "Failed to load X509 certificate %s: %s" % (filename, err))
1213 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1214 constants.SSL_CERT_EXPIRATION_ERROR)
1217 fnamemsg = "While verifying %s: %s" % (filename, msg)
1222 return (None, fnamemsg)
1223 elif errcode == utils.CERT_WARNING:
1224 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1225 elif errcode == utils.CERT_ERROR:
1226 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1228 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1231 class LUVerifyCluster(LogicalUnit):
1232 """Verifies the cluster status.
1235 HPATH = "cluster-verify"
1236 HTYPE = constants.HTYPE_CLUSTER
1238 ("skip_checks", _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1239 ("verbose", _TBool),
1240 ("error_codes", _TBool),
1241 ("debug_simulate_errors", _TBool),
1245 TCLUSTER = "cluster"
1247 TINSTANCE = "instance"
1249 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1250 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1251 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1252 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1253 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1254 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1255 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1256 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1257 ENODEDRBD = (TNODE, "ENODEDRBD")
1258 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1259 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1260 ENODEHV = (TNODE, "ENODEHV")
1261 ENODELVM = (TNODE, "ENODELVM")
1262 ENODEN1 = (TNODE, "ENODEN1")
1263 ENODENET = (TNODE, "ENODENET")
1264 ENODEOS = (TNODE, "ENODEOS")
1265 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1266 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1267 ENODERPC = (TNODE, "ENODERPC")
1268 ENODESSH = (TNODE, "ENODESSH")
1269 ENODEVERSION = (TNODE, "ENODEVERSION")
1270 ENODESETUP = (TNODE, "ENODESETUP")
1271 ENODETIME = (TNODE, "ENODETIME")
1273 ETYPE_FIELD = "code"
1274 ETYPE_ERROR = "ERROR"
1275 ETYPE_WARNING = "WARNING"
1277 class NodeImage(object):
1278 """A class representing the logical and physical status of a node.
1281 @ivar name: the node name to which this object refers
1282 @ivar volumes: a structure as returned from
1283 L{ganeti.backend.GetVolumeList} (runtime)
1284 @ivar instances: a list of running instances (runtime)
1285 @ivar pinst: list of configured primary instances (config)
1286 @ivar sinst: list of configured secondary instances (config)
1287 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1288 of this node (config)
1289 @ivar mfree: free memory, as reported by hypervisor (runtime)
1290 @ivar dfree: free disk, as reported by the node (runtime)
1291 @ivar offline: the offline status (config)
1292 @type rpc_fail: boolean
1293 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1294 not whether the individual keys were correct) (runtime)
1295 @type lvm_fail: boolean
1296 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1297 @type hyp_fail: boolean
1298 @ivar hyp_fail: whether the RPC call didn't return the instance list
1299 @type ghost: boolean
1300 @ivar ghost: whether this is a known node or not (config)
1301 @type os_fail: boolean
1302 @ivar os_fail: whether the RPC call didn't return valid OS data
1304 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1307 def __init__(self, offline=False, name=None):
1316 self.offline = offline
1317 self.rpc_fail = False
1318 self.lvm_fail = False
1319 self.hyp_fail = False
1321 self.os_fail = False
1324 def ExpandNames(self):
1325 self.needed_locks = {
1326 locking.LEVEL_NODE: locking.ALL_SET,
1327 locking.LEVEL_INSTANCE: locking.ALL_SET,
1329 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1331 def _Error(self, ecode, item, msg, *args, **kwargs):
1332 """Format an error message.
1334 Based on the opcode's error_codes parameter, either format a
1335 parseable error code, or a simpler error string.
1337 This must be called only from Exec and functions called from Exec.
1340 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1342 # first complete the msg
1345 # then format the whole message
1346 if self.op.error_codes:
1347 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1353 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1354 # and finally report it via the feedback_fn
1355 self._feedback_fn(" - %s" % msg)
1357 def _ErrorIf(self, cond, *args, **kwargs):
1358 """Log an error message if the passed condition is True.
1361 cond = bool(cond) or self.op.debug_simulate_errors
1363 self._Error(*args, **kwargs)
1364 # do not mark the operation as failed for WARN cases only
1365 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1366 self.bad = self.bad or cond
1368 def _VerifyNode(self, ninfo, nresult):
1369 """Run multiple tests against a node.
1373 - compares ganeti version
1374 - checks vg existence and size > 20G
1375 - checks config file checksum
1376 - checks ssh to other nodes
1378 @type ninfo: L{objects.Node}
1379 @param ninfo: the node to check
1380 @param nresult: the results from the node
1382 @return: whether overall this call was successful (and we can expect
1383 reasonable values in the respose)
1387 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1389 # main result, nresult should be a non-empty dict
1390 test = not nresult or not isinstance(nresult, dict)
1391 _ErrorIf(test, self.ENODERPC, node,
1392 "unable to verify node: no data returned")
1396 # compares ganeti version
1397 local_version = constants.PROTOCOL_VERSION
1398 remote_version = nresult.get("version", None)
1399 test = not (remote_version and
1400 isinstance(remote_version, (list, tuple)) and
1401 len(remote_version) == 2)
1402 _ErrorIf(test, self.ENODERPC, node,
1403 "connection to node returned invalid data")
1407 test = local_version != remote_version[0]
1408 _ErrorIf(test, self.ENODEVERSION, node,
1409 "incompatible protocol versions: master %s,"
1410 " node %s", local_version, remote_version[0])
1414 # node seems compatible, we can actually try to look into its results
1416 # full package version
1417 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1418 self.ENODEVERSION, node,
1419 "software version mismatch: master %s, node %s",
1420 constants.RELEASE_VERSION, remote_version[1],
1421 code=self.ETYPE_WARNING)
1423 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1424 if isinstance(hyp_result, dict):
1425 for hv_name, hv_result in hyp_result.iteritems():
1426 test = hv_result is not None
1427 _ErrorIf(test, self.ENODEHV, node,
1428 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1431 test = nresult.get(constants.NV_NODESETUP,
1432 ["Missing NODESETUP results"])
1433 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1438 def _VerifyNodeTime(self, ninfo, nresult,
1439 nvinfo_starttime, nvinfo_endtime):
1440 """Check the node time.
1442 @type ninfo: L{objects.Node}
1443 @param ninfo: the node to check
1444 @param nresult: the remote results for the node
1445 @param nvinfo_starttime: the start time of the RPC call
1446 @param nvinfo_endtime: the end time of the RPC call
1450 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1452 ntime = nresult.get(constants.NV_TIME, None)
1454 ntime_merged = utils.MergeTime(ntime)
1455 except (ValueError, TypeError):
1456 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1459 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1460 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1461 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1462 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1466 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1467 "Node time diverges by at least %s from master node time",
1470 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1471 """Check the node time.
1473 @type ninfo: L{objects.Node}
1474 @param ninfo: the node to check
1475 @param nresult: the remote results for the node
1476 @param vg_name: the configured VG name
1483 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1485 # checks vg existence and size > 20G
1486 vglist = nresult.get(constants.NV_VGLIST, None)
1488 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1490 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1491 constants.MIN_VG_SIZE)
1492 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1495 pvlist = nresult.get(constants.NV_PVLIST, None)
1496 test = pvlist is None
1497 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1499 # check that ':' is not present in PV names, since it's a
1500 # special character for lvcreate (denotes the range of PEs to
1502 for _, pvname, owner_vg in pvlist:
1503 test = ":" in pvname
1504 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1505 " '%s' of VG '%s'", pvname, owner_vg)
1507 def _VerifyNodeNetwork(self, ninfo, nresult):
1508 """Check the node time.
1510 @type ninfo: L{objects.Node}
1511 @param ninfo: the node to check
1512 @param nresult: the remote results for the node
1516 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1518 test = constants.NV_NODELIST not in nresult
1519 _ErrorIf(test, self.ENODESSH, node,
1520 "node hasn't returned node ssh connectivity data")
1522 if nresult[constants.NV_NODELIST]:
1523 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1524 _ErrorIf(True, self.ENODESSH, node,
1525 "ssh communication with node '%s': %s", a_node, a_msg)
1527 test = constants.NV_NODENETTEST not in nresult
1528 _ErrorIf(test, self.ENODENET, node,
1529 "node hasn't returned node tcp connectivity data")
1531 if nresult[constants.NV_NODENETTEST]:
1532 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1534 _ErrorIf(True, self.ENODENET, node,
1535 "tcp communication with node '%s': %s",
1536 anode, nresult[constants.NV_NODENETTEST][anode])
1538 test = constants.NV_MASTERIP not in nresult
1539 _ErrorIf(test, self.ENODENET, node,
1540 "node hasn't returned node master IP reachability data")
1542 if not nresult[constants.NV_MASTERIP]:
1543 if node == self.master_node:
1544 msg = "the master node cannot reach the master IP (not configured?)"
1546 msg = "cannot reach the master IP"
1547 _ErrorIf(True, self.ENODENET, node, msg)
1550 def _VerifyInstance(self, instance, instanceconfig, node_image):
1551 """Verify an instance.
1553 This function checks to see if the required block devices are
1554 available on the instance's node.
1557 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1558 node_current = instanceconfig.primary_node
1560 node_vol_should = {}
1561 instanceconfig.MapLVsByNode(node_vol_should)
1563 for node in node_vol_should:
1564 n_img = node_image[node]
1565 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1566 # ignore missing volumes on offline or broken nodes
1568 for volume in node_vol_should[node]:
1569 test = volume not in n_img.volumes
1570 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1571 "volume %s missing on node %s", volume, node)
1573 if instanceconfig.admin_up:
1574 pri_img = node_image[node_current]
1575 test = instance not in pri_img.instances and not pri_img.offline
1576 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1577 "instance not running on its primary node %s",
1580 for node, n_img in node_image.items():
1581 if (not node == node_current):
1582 test = instance in n_img.instances
1583 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1584 "instance should not run on node %s", node)
1586 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1587 """Verify if there are any unknown volumes in the cluster.
1589 The .os, .swap and backup volumes are ignored. All other volumes are
1590 reported as unknown.
1593 for node, n_img in node_image.items():
1594 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1595 # skip non-healthy nodes
1597 for volume in n_img.volumes:
1598 test = (node not in node_vol_should or
1599 volume not in node_vol_should[node])
1600 self._ErrorIf(test, self.ENODEORPHANLV, node,
1601 "volume %s is unknown", volume)
1603 def _VerifyOrphanInstances(self, instancelist, node_image):
1604 """Verify the list of running instances.
1606 This checks what instances are running but unknown to the cluster.
1609 for node, n_img in node_image.items():
1610 for o_inst in n_img.instances:
1611 test = o_inst not in instancelist
1612 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1613 "instance %s on node %s should not exist", o_inst, node)
1615 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1616 """Verify N+1 Memory Resilience.
1618 Check that if one single node dies we can still start all the
1619 instances it was primary for.
1622 for node, n_img in node_image.items():
1623 # This code checks that every node which is now listed as
1624 # secondary has enough memory to host all instances it is
1625 # supposed to should a single other node in the cluster fail.
1626 # FIXME: not ready for failover to an arbitrary node
1627 # FIXME: does not support file-backed instances
1628 # WARNING: we currently take into account down instances as well
1629 # as up ones, considering that even if they're down someone
1630 # might want to start them even in the event of a node failure.
1631 for prinode, instances in n_img.sbp.items():
1633 for instance in instances:
1634 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1635 if bep[constants.BE_AUTO_BALANCE]:
1636 needed_mem += bep[constants.BE_MEMORY]
1637 test = n_img.mfree < needed_mem
1638 self._ErrorIf(test, self.ENODEN1, node,
1639 "not enough memory on to accommodate"
1640 " failovers should peer node %s fail", prinode)
1642 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1644 """Verifies and computes the node required file checksums.
1646 @type ninfo: L{objects.Node}
1647 @param ninfo: the node to check
1648 @param nresult: the remote results for the node
1649 @param file_list: required list of files
1650 @param local_cksum: dictionary of local files and their checksums
1651 @param master_files: list of files that only masters should have
1655 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1657 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1658 test = not isinstance(remote_cksum, dict)
1659 _ErrorIf(test, self.ENODEFILECHECK, node,
1660 "node hasn't returned file checksum data")
1664 for file_name in file_list:
1665 node_is_mc = ninfo.master_candidate
1666 must_have = (file_name not in master_files) or node_is_mc
1668 test1 = file_name not in remote_cksum
1670 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1672 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1673 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1674 "file '%s' missing", file_name)
1675 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1676 "file '%s' has wrong checksum", file_name)
1677 # not candidate and this is not a must-have file
1678 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1679 "file '%s' should not exist on non master"
1680 " candidates (and the file is outdated)", file_name)
1681 # all good, except non-master/non-must have combination
1682 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1683 "file '%s' should not exist"
1684 " on non master candidates", file_name)
1686 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1687 """Verifies and the node DRBD status.
1689 @type ninfo: L{objects.Node}
1690 @param ninfo: the node to check
1691 @param nresult: the remote results for the node
1692 @param instanceinfo: the dict of instances
1693 @param drbd_map: the DRBD map as returned by
1694 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1698 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1700 # compute the DRBD minors
1702 for minor, instance in drbd_map[node].items():
1703 test = instance not in instanceinfo
1704 _ErrorIf(test, self.ECLUSTERCFG, None,
1705 "ghost instance '%s' in temporary DRBD map", instance)
1706 # ghost instance should not be running, but otherwise we
1707 # don't give double warnings (both ghost instance and
1708 # unallocated minor in use)
1710 node_drbd[minor] = (instance, False)
1712 instance = instanceinfo[instance]
1713 node_drbd[minor] = (instance.name, instance.admin_up)
1715 # and now check them
1716 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1717 test = not isinstance(used_minors, (tuple, list))
1718 _ErrorIf(test, self.ENODEDRBD, node,
1719 "cannot parse drbd status file: %s", str(used_minors))
1721 # we cannot check drbd status
1724 for minor, (iname, must_exist) in node_drbd.items():
1725 test = minor not in used_minors and must_exist
1726 _ErrorIf(test, self.ENODEDRBD, node,
1727 "drbd minor %d of instance %s is not active", minor, iname)
1728 for minor in used_minors:
1729 test = minor not in node_drbd
1730 _ErrorIf(test, self.ENODEDRBD, node,
1731 "unallocated drbd minor %d is in use", minor)
1733 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1734 """Builds the node OS structures.
1736 @type ninfo: L{objects.Node}
1737 @param ninfo: the node to check
1738 @param nresult: the remote results for the node
1739 @param nimg: the node image object
1743 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1745 remote_os = nresult.get(constants.NV_OSLIST, None)
1746 test = (not isinstance(remote_os, list) or
1747 not compat.all(remote_os,
1748 lambda v: isinstance(v, list) and len(v) == 7))
1750 _ErrorIf(test, self.ENODEOS, node,
1751 "node hasn't returned valid OS data")
1760 for (name, os_path, status, diagnose,
1761 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1763 if name not in os_dict:
1766 # parameters is a list of lists instead of list of tuples due to
1767 # JSON lacking a real tuple type, fix it:
1768 parameters = [tuple(v) for v in parameters]
1769 os_dict[name].append((os_path, status, diagnose,
1770 set(variants), set(parameters), set(api_ver)))
1772 nimg.oslist = os_dict
1774 def _VerifyNodeOS(self, ninfo, nimg, base):
1775 """Verifies the node OS list.
1777 @type ninfo: L{objects.Node}
1778 @param ninfo: the node to check
1779 @param nimg: the node image object
1780 @param base: the 'template' node we match against (e.g. from the master)
1784 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1786 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1788 for os_name, os_data in nimg.oslist.items():
1789 assert os_data, "Empty OS status for OS %s?!" % os_name
1790 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1791 _ErrorIf(not f_status, self.ENODEOS, node,
1792 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1793 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1794 "OS '%s' has multiple entries (first one shadows the rest): %s",
1795 os_name, utils.CommaJoin([v[0] for v in os_data]))
1796 # this will catched in backend too
1797 _ErrorIf(compat.any(f_api, lambda v: v >= constants.OS_API_V15)
1798 and not f_var, self.ENODEOS, node,
1799 "OS %s with API at least %d does not declare any variant",
1800 os_name, constants.OS_API_V15)
1801 # comparisons with the 'base' image
1802 test = os_name not in base.oslist
1803 _ErrorIf(test, self.ENODEOS, node,
1804 "Extra OS %s not present on reference node (%s)",
1808 assert base.oslist[os_name], "Base node has empty OS status?"
1809 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1811 # base OS is invalid, skipping
1813 for kind, a, b in [("API version", f_api, b_api),
1814 ("variants list", f_var, b_var),
1815 ("parameters", f_param, b_param)]:
1816 _ErrorIf(a != b, self.ENODEOS, node,
1817 "OS %s %s differs from reference node %s: %s vs. %s",
1818 kind, os_name, base.name,
1819 utils.CommaJoin(a), utils.CommaJoin(a))
1821 # check any missing OSes
1822 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1823 _ErrorIf(missing, self.ENODEOS, node,
1824 "OSes present on reference node %s but missing on this node: %s",
1825 base.name, utils.CommaJoin(missing))
1827 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1828 """Verifies and updates the node volume data.
1830 This function will update a L{NodeImage}'s internal structures
1831 with data from the remote call.
1833 @type ninfo: L{objects.Node}
1834 @param ninfo: the node to check
1835 @param nresult: the remote results for the node
1836 @param nimg: the node image object
1837 @param vg_name: the configured VG name
1841 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1843 nimg.lvm_fail = True
1844 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1847 elif isinstance(lvdata, basestring):
1848 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1849 utils.SafeEncode(lvdata))
1850 elif not isinstance(lvdata, dict):
1851 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1853 nimg.volumes = lvdata
1854 nimg.lvm_fail = False
1856 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1857 """Verifies and updates the node instance list.
1859 If the listing was successful, then updates this node's instance
1860 list. Otherwise, it marks the RPC call as failed for the instance
1863 @type ninfo: L{objects.Node}
1864 @param ninfo: the node to check
1865 @param nresult: the remote results for the node
1866 @param nimg: the node image object
1869 idata = nresult.get(constants.NV_INSTANCELIST, None)
1870 test = not isinstance(idata, list)
1871 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1872 " (instancelist): %s", utils.SafeEncode(str(idata)))
1874 nimg.hyp_fail = True
1876 nimg.instances = idata
1878 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1879 """Verifies and computes a node information map
1881 @type ninfo: L{objects.Node}
1882 @param ninfo: the node to check
1883 @param nresult: the remote results for the node
1884 @param nimg: the node image object
1885 @param vg_name: the configured VG name
1889 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1891 # try to read free memory (from the hypervisor)
1892 hv_info = nresult.get(constants.NV_HVINFO, None)
1893 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1894 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1897 nimg.mfree = int(hv_info["memory_free"])
1898 except (ValueError, TypeError):
1899 _ErrorIf(True, self.ENODERPC, node,
1900 "node returned invalid nodeinfo, check hypervisor")
1902 # FIXME: devise a free space model for file based instances as well
1903 if vg_name is not None:
1904 test = (constants.NV_VGLIST not in nresult or
1905 vg_name not in nresult[constants.NV_VGLIST])
1906 _ErrorIf(test, self.ENODELVM, node,
1907 "node didn't return data for the volume group '%s'"
1908 " - it is either missing or broken", vg_name)
1911 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1912 except (ValueError, TypeError):
1913 _ErrorIf(True, self.ENODERPC, node,
1914 "node returned invalid LVM info, check LVM status")
1916 def BuildHooksEnv(self):
1919 Cluster-Verify hooks just ran in the post phase and their failure makes
1920 the output be logged in the verify output and the verification to fail.
1923 all_nodes = self.cfg.GetNodeList()
1925 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1927 for node in self.cfg.GetAllNodesInfo().values():
1928 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1930 return env, [], all_nodes
1932 def Exec(self, feedback_fn):
1933 """Verify integrity of cluster, performing various test on nodes.
1937 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1938 verbose = self.op.verbose
1939 self._feedback_fn = feedback_fn
1940 feedback_fn("* Verifying global settings")
1941 for msg in self.cfg.VerifyConfig():
1942 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1944 # Check the cluster certificates
1945 for cert_filename in constants.ALL_CERT_FILES:
1946 (errcode, msg) = _VerifyCertificate(cert_filename)
1947 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1949 vg_name = self.cfg.GetVGName()
1950 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1951 cluster = self.cfg.GetClusterInfo()
1952 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1953 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1954 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1955 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1956 for iname in instancelist)
1957 i_non_redundant = [] # Non redundant instances
1958 i_non_a_balanced = [] # Non auto-balanced instances
1959 n_offline = 0 # Count of offline nodes
1960 n_drained = 0 # Count of nodes being drained
1961 node_vol_should = {}
1963 # FIXME: verify OS list
1964 # do local checksums
1965 master_files = [constants.CLUSTER_CONF_FILE]
1966 master_node = self.master_node = self.cfg.GetMasterNode()
1967 master_ip = self.cfg.GetMasterIP()
1969 file_names = ssconf.SimpleStore().GetFileList()
1970 file_names.extend(constants.ALL_CERT_FILES)
1971 file_names.extend(master_files)
1972 if cluster.modify_etc_hosts:
1973 file_names.append(constants.ETC_HOSTS)
1975 local_checksums = utils.FingerprintFiles(file_names)
1977 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1978 node_verify_param = {
1979 constants.NV_FILELIST: file_names,
1980 constants.NV_NODELIST: [node.name for node in nodeinfo
1981 if not node.offline],
1982 constants.NV_HYPERVISOR: hypervisors,
1983 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1984 node.secondary_ip) for node in nodeinfo
1985 if not node.offline],
1986 constants.NV_INSTANCELIST: hypervisors,
1987 constants.NV_VERSION: None,
1988 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1989 constants.NV_NODESETUP: None,
1990 constants.NV_TIME: None,
1991 constants.NV_MASTERIP: (master_node, master_ip),
1992 constants.NV_OSLIST: None,
1995 if vg_name is not None:
1996 node_verify_param[constants.NV_VGLIST] = None
1997 node_verify_param[constants.NV_LVLIST] = vg_name
1998 node_verify_param[constants.NV_PVLIST] = [vg_name]
1999 node_verify_param[constants.NV_DRBDLIST] = None
2001 # Build our expected cluster state
2002 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2004 for node in nodeinfo)
2006 for instance in instancelist:
2007 inst_config = instanceinfo[instance]
2009 for nname in inst_config.all_nodes:
2010 if nname not in node_image:
2012 gnode = self.NodeImage(name=nname)
2014 node_image[nname] = gnode
2016 inst_config.MapLVsByNode(node_vol_should)
2018 pnode = inst_config.primary_node
2019 node_image[pnode].pinst.append(instance)
2021 for snode in inst_config.secondary_nodes:
2022 nimg = node_image[snode]
2023 nimg.sinst.append(instance)
2024 if pnode not in nimg.sbp:
2025 nimg.sbp[pnode] = []
2026 nimg.sbp[pnode].append(instance)
2028 # At this point, we have the in-memory data structures complete,
2029 # except for the runtime information, which we'll gather next
2031 # Due to the way our RPC system works, exact response times cannot be
2032 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2033 # time before and after executing the request, we can at least have a time
2035 nvinfo_starttime = time.time()
2036 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2037 self.cfg.GetClusterName())
2038 nvinfo_endtime = time.time()
2040 all_drbd_map = self.cfg.ComputeDRBDMap()
2042 feedback_fn("* Verifying node status")
2046 for node_i in nodeinfo:
2048 nimg = node_image[node]
2052 feedback_fn("* Skipping offline node %s" % (node,))
2056 if node == master_node:
2058 elif node_i.master_candidate:
2059 ntype = "master candidate"
2060 elif node_i.drained:
2066 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2068 msg = all_nvinfo[node].fail_msg
2069 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2071 nimg.rpc_fail = True
2074 nresult = all_nvinfo[node].payload
2076 nimg.call_ok = self._VerifyNode(node_i, nresult)
2077 self._VerifyNodeNetwork(node_i, nresult)
2078 self._VerifyNodeLVM(node_i, nresult, vg_name)
2079 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2081 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
2082 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2084 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2085 self._UpdateNodeInstances(node_i, nresult, nimg)
2086 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2087 self._UpdateNodeOS(node_i, nresult, nimg)
2088 if not nimg.os_fail:
2089 if refos_img is None:
2091 self._VerifyNodeOS(node_i, nimg, refos_img)
2093 feedback_fn("* Verifying instance status")
2094 for instance in instancelist:
2096 feedback_fn("* Verifying instance %s" % instance)
2097 inst_config = instanceinfo[instance]
2098 self._VerifyInstance(instance, inst_config, node_image)
2099 inst_nodes_offline = []
2101 pnode = inst_config.primary_node
2102 pnode_img = node_image[pnode]
2103 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2104 self.ENODERPC, pnode, "instance %s, connection to"
2105 " primary node failed", instance)
2107 if pnode_img.offline:
2108 inst_nodes_offline.append(pnode)
2110 # If the instance is non-redundant we cannot survive losing its primary
2111 # node, so we are not N+1 compliant. On the other hand we have no disk
2112 # templates with more than one secondary so that situation is not well
2114 # FIXME: does not support file-backed instances
2115 if not inst_config.secondary_nodes:
2116 i_non_redundant.append(instance)
2117 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2118 instance, "instance has multiple secondary nodes: %s",
2119 utils.CommaJoin(inst_config.secondary_nodes),
2120 code=self.ETYPE_WARNING)
2122 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2123 i_non_a_balanced.append(instance)
2125 for snode in inst_config.secondary_nodes:
2126 s_img = node_image[snode]
2127 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2128 "instance %s, connection to secondary node failed", instance)
2131 inst_nodes_offline.append(snode)
2133 # warn that the instance lives on offline nodes
2134 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2135 "instance lives on offline node(s) %s",
2136 utils.CommaJoin(inst_nodes_offline))
2137 # ... or ghost nodes
2138 for node in inst_config.all_nodes:
2139 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2140 "instance lives on ghost node %s", node)
2142 feedback_fn("* Verifying orphan volumes")
2143 self._VerifyOrphanVolumes(node_vol_should, node_image)
2145 feedback_fn("* Verifying orphan instances")
2146 self._VerifyOrphanInstances(instancelist, node_image)
2148 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2149 feedback_fn("* Verifying N+1 Memory redundancy")
2150 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2152 feedback_fn("* Other Notes")
2154 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2155 % len(i_non_redundant))
2157 if i_non_a_balanced:
2158 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2159 % len(i_non_a_balanced))
2162 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2165 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2169 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2170 """Analyze the post-hooks' result
2172 This method analyses the hook result, handles it, and sends some
2173 nicely-formatted feedback back to the user.
2175 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2176 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2177 @param hooks_results: the results of the multi-node hooks rpc call
2178 @param feedback_fn: function used send feedback back to the caller
2179 @param lu_result: previous Exec result
2180 @return: the new Exec result, based on the previous result
2184 # We only really run POST phase hooks, and are only interested in
2186 if phase == constants.HOOKS_PHASE_POST:
2187 # Used to change hooks' output to proper indentation
2188 indent_re = re.compile('^', re.M)
2189 feedback_fn("* Hooks Results")
2190 assert hooks_results, "invalid result from hooks"
2192 for node_name in hooks_results:
2193 res = hooks_results[node_name]
2195 test = msg and not res.offline
2196 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2197 "Communication failure in hooks execution: %s", msg)
2198 if res.offline or msg:
2199 # No need to investigate payload if node is offline or gave an error.
2200 # override manually lu_result here as _ErrorIf only
2201 # overrides self.bad
2204 for script, hkr, output in res.payload:
2205 test = hkr == constants.HKR_FAIL
2206 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2207 "Script %s failed, output:", script)
2209 output = indent_re.sub(' ', output)
2210 feedback_fn("%s" % output)
2216 class LUVerifyDisks(NoHooksLU):
2217 """Verifies the cluster disks status.
2223 def ExpandNames(self):
2224 self.needed_locks = {
2225 locking.LEVEL_NODE: locking.ALL_SET,
2226 locking.LEVEL_INSTANCE: locking.ALL_SET,
2228 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2230 def Exec(self, feedback_fn):
2231 """Verify integrity of cluster disks.
2233 @rtype: tuple of three items
2234 @return: a tuple of (dict of node-to-node_error, list of instances
2235 which need activate-disks, dict of instance: (node, volume) for
2239 result = res_nodes, res_instances, res_missing = {}, [], {}
2241 vg_name = self.cfg.GetVGName()
2242 nodes = utils.NiceSort(self.cfg.GetNodeList())
2243 instances = [self.cfg.GetInstanceInfo(name)
2244 for name in self.cfg.GetInstanceList()]
2247 for inst in instances:
2249 if (not inst.admin_up or
2250 inst.disk_template not in constants.DTS_NET_MIRROR):
2252 inst.MapLVsByNode(inst_lvs)
2253 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2254 for node, vol_list in inst_lvs.iteritems():
2255 for vol in vol_list:
2256 nv_dict[(node, vol)] = inst
2261 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2265 node_res = node_lvs[node]
2266 if node_res.offline:
2268 msg = node_res.fail_msg
2270 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2271 res_nodes[node] = msg
2274 lvs = node_res.payload
2275 for lv_name, (_, _, lv_online) in lvs.items():
2276 inst = nv_dict.pop((node, lv_name), None)
2277 if (not lv_online and inst is not None
2278 and inst.name not in res_instances):
2279 res_instances.append(inst.name)
2281 # any leftover items in nv_dict are missing LVs, let's arrange the
2283 for key, inst in nv_dict.iteritems():
2284 if inst.name not in res_missing:
2285 res_missing[inst.name] = []
2286 res_missing[inst.name].append(key)
2291 class LURepairDiskSizes(NoHooksLU):
2292 """Verifies the cluster disks sizes.
2295 _OP_REQP = [("instances", _TListOf(_TNEString))]
2298 def ExpandNames(self):
2299 if self.op.instances:
2300 self.wanted_names = []
2301 for name in self.op.instances:
2302 full_name = _ExpandInstanceName(self.cfg, name)
2303 self.wanted_names.append(full_name)
2304 self.needed_locks = {
2305 locking.LEVEL_NODE: [],
2306 locking.LEVEL_INSTANCE: self.wanted_names,
2308 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2310 self.wanted_names = None
2311 self.needed_locks = {
2312 locking.LEVEL_NODE: locking.ALL_SET,
2313 locking.LEVEL_INSTANCE: locking.ALL_SET,
2315 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2317 def DeclareLocks(self, level):
2318 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2319 self._LockInstancesNodes(primary_only=True)
2321 def CheckPrereq(self):
2322 """Check prerequisites.
2324 This only checks the optional instance list against the existing names.
2327 if self.wanted_names is None:
2328 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2330 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2331 in self.wanted_names]
2333 def _EnsureChildSizes(self, disk):
2334 """Ensure children of the disk have the needed disk size.
2336 This is valid mainly for DRBD8 and fixes an issue where the
2337 children have smaller disk size.
2339 @param disk: an L{ganeti.objects.Disk} object
2342 if disk.dev_type == constants.LD_DRBD8:
2343 assert disk.children, "Empty children for DRBD8?"
2344 fchild = disk.children[0]
2345 mismatch = fchild.size < disk.size
2347 self.LogInfo("Child disk has size %d, parent %d, fixing",
2348 fchild.size, disk.size)
2349 fchild.size = disk.size
2351 # and we recurse on this child only, not on the metadev
2352 return self._EnsureChildSizes(fchild) or mismatch
2356 def Exec(self, feedback_fn):
2357 """Verify the size of cluster disks.
2360 # TODO: check child disks too
2361 # TODO: check differences in size between primary/secondary nodes
2363 for instance in self.wanted_instances:
2364 pnode = instance.primary_node
2365 if pnode not in per_node_disks:
2366 per_node_disks[pnode] = []
2367 for idx, disk in enumerate(instance.disks):
2368 per_node_disks[pnode].append((instance, idx, disk))
2371 for node, dskl in per_node_disks.items():
2372 newl = [v[2].Copy() for v in dskl]
2374 self.cfg.SetDiskID(dsk, node)
2375 result = self.rpc.call_blockdev_getsizes(node, newl)
2377 self.LogWarning("Failure in blockdev_getsizes call to node"
2378 " %s, ignoring", node)
2380 if len(result.data) != len(dskl):
2381 self.LogWarning("Invalid result from node %s, ignoring node results",
2384 for ((instance, idx, disk), size) in zip(dskl, result.data):
2386 self.LogWarning("Disk %d of instance %s did not return size"
2387 " information, ignoring", idx, instance.name)
2389 if not isinstance(size, (int, long)):
2390 self.LogWarning("Disk %d of instance %s did not return valid"
2391 " size information, ignoring", idx, instance.name)
2394 if size != disk.size:
2395 self.LogInfo("Disk %d of instance %s has mismatched size,"
2396 " correcting: recorded %d, actual %d", idx,
2397 instance.name, disk.size, size)
2399 self.cfg.Update(instance, feedback_fn)
2400 changed.append((instance.name, idx, size))
2401 if self._EnsureChildSizes(disk):
2402 self.cfg.Update(instance, feedback_fn)
2403 changed.append((instance.name, idx, disk.size))
2407 class LURenameCluster(LogicalUnit):
2408 """Rename the cluster.
2411 HPATH = "cluster-rename"
2412 HTYPE = constants.HTYPE_CLUSTER
2413 _OP_REQP = [("name", _TNEString)]
2415 def BuildHooksEnv(self):
2420 "OP_TARGET": self.cfg.GetClusterName(),
2421 "NEW_NAME": self.op.name,
2423 mn = self.cfg.GetMasterNode()
2424 all_nodes = self.cfg.GetNodeList()
2425 return env, [mn], all_nodes
2427 def CheckPrereq(self):
2428 """Verify that the passed name is a valid one.
2431 hostname = utils.GetHostInfo(self.op.name)
2433 new_name = hostname.name
2434 self.ip = new_ip = hostname.ip
2435 old_name = self.cfg.GetClusterName()
2436 old_ip = self.cfg.GetMasterIP()
2437 if new_name == old_name and new_ip == old_ip:
2438 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2439 " cluster has changed",
2441 if new_ip != old_ip:
2442 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2443 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2444 " reachable on the network. Aborting." %
2445 new_ip, errors.ECODE_NOTUNIQUE)
2447 self.op.name = new_name
2449 def Exec(self, feedback_fn):
2450 """Rename the cluster.
2453 clustername = self.op.name
2456 # shutdown the master IP
2457 master = self.cfg.GetMasterNode()
2458 result = self.rpc.call_node_stop_master(master, False)
2459 result.Raise("Could not disable the master role")
2462 cluster = self.cfg.GetClusterInfo()
2463 cluster.cluster_name = clustername
2464 cluster.master_ip = ip
2465 self.cfg.Update(cluster, feedback_fn)
2467 # update the known hosts file
2468 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2469 node_list = self.cfg.GetNodeList()
2471 node_list.remove(master)
2474 result = self.rpc.call_upload_file(node_list,
2475 constants.SSH_KNOWN_HOSTS_FILE)
2476 for to_node, to_result in result.iteritems():
2477 msg = to_result.fail_msg
2479 msg = ("Copy of file %s to node %s failed: %s" %
2480 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2481 self.proc.LogWarning(msg)
2484 result = self.rpc.call_node_start_master(master, False, False)
2485 msg = result.fail_msg
2487 self.LogWarning("Could not re-enable the master role on"
2488 " the master, please restart manually: %s", msg)
2491 def _RecursiveCheckIfLVMBased(disk):
2492 """Check if the given disk or its children are lvm-based.
2494 @type disk: L{objects.Disk}
2495 @param disk: the disk to check
2497 @return: boolean indicating whether a LD_LV dev_type was found or not
2501 for chdisk in disk.children:
2502 if _RecursiveCheckIfLVMBased(chdisk):
2504 return disk.dev_type == constants.LD_LV
2507 class LUSetClusterParams(LogicalUnit):
2508 """Change the parameters of the cluster.
2511 HPATH = "cluster-modify"
2512 HTYPE = constants.HTYPE_CLUSTER
2514 ("hvparams", _TOr(_TDictOf(_TNEString, _TDict), _TNone)),
2515 ("os_hvp", _TOr(_TDictOf(_TNEString, _TDict), _TNone)),
2516 ("osparams", _TOr(_TDictOf(_TNEString, _TDict), _TNone)),
2517 ("enabled_hypervisors",
2518 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2521 ("candidate_pool_size", None),
2524 ("remove_uids", None),
2530 def CheckArguments(self):
2534 if self.op.candidate_pool_size is not None:
2536 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2537 except (ValueError, TypeError), err:
2538 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2539 str(err), errors.ECODE_INVAL)
2540 if self.op.candidate_pool_size < 1:
2541 raise errors.OpPrereqError("At least one master candidate needed",
2544 _CheckBooleanOpField(self.op, "maintain_node_health")
2546 if self.op.uid_pool:
2547 uidpool.CheckUidPool(self.op.uid_pool)
2549 if self.op.add_uids:
2550 uidpool.CheckUidPool(self.op.add_uids)
2552 if self.op.remove_uids:
2553 uidpool.CheckUidPool(self.op.remove_uids)
2555 def ExpandNames(self):
2556 # FIXME: in the future maybe other cluster params won't require checking on
2557 # all nodes to be modified.
2558 self.needed_locks = {
2559 locking.LEVEL_NODE: locking.ALL_SET,
2561 self.share_locks[locking.LEVEL_NODE] = 1
2563 def BuildHooksEnv(self):
2568 "OP_TARGET": self.cfg.GetClusterName(),
2569 "NEW_VG_NAME": self.op.vg_name,
2571 mn = self.cfg.GetMasterNode()
2572 return env, [mn], [mn]
2574 def CheckPrereq(self):
2575 """Check prerequisites.
2577 This checks whether the given params don't conflict and
2578 if the given volume group is valid.
2581 if self.op.vg_name is not None and not self.op.vg_name:
2582 instances = self.cfg.GetAllInstancesInfo().values()
2583 for inst in instances:
2584 for disk in inst.disks:
2585 if _RecursiveCheckIfLVMBased(disk):
2586 raise errors.OpPrereqError("Cannot disable lvm storage while"
2587 " lvm-based instances exist",
2590 node_list = self.acquired_locks[locking.LEVEL_NODE]
2592 # if vg_name not None, checks given volume group on all nodes
2594 vglist = self.rpc.call_vg_list(node_list)
2595 for node in node_list:
2596 msg = vglist[node].fail_msg
2598 # ignoring down node
2599 self.LogWarning("Error while gathering data on node %s"
2600 " (ignoring node): %s", node, msg)
2602 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2604 constants.MIN_VG_SIZE)
2606 raise errors.OpPrereqError("Error on node '%s': %s" %
2607 (node, vgstatus), errors.ECODE_ENVIRON)
2609 self.cluster = cluster = self.cfg.GetClusterInfo()
2610 # validate params changes
2611 if self.op.beparams:
2612 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2613 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2615 if self.op.nicparams:
2616 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2617 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2618 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2621 # check all instances for consistency
2622 for instance in self.cfg.GetAllInstancesInfo().values():
2623 for nic_idx, nic in enumerate(instance.nics):
2624 params_copy = copy.deepcopy(nic.nicparams)
2625 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2627 # check parameter syntax
2629 objects.NIC.CheckParameterSyntax(params_filled)
2630 except errors.ConfigurationError, err:
2631 nic_errors.append("Instance %s, nic/%d: %s" %
2632 (instance.name, nic_idx, err))
2634 # if we're moving instances to routed, check that they have an ip
2635 target_mode = params_filled[constants.NIC_MODE]
2636 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2637 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2638 (instance.name, nic_idx))
2640 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2641 "\n".join(nic_errors))
2643 # hypervisor list/parameters
2644 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2645 if self.op.hvparams:
2646 for hv_name, hv_dict in self.op.hvparams.items():
2647 if hv_name not in self.new_hvparams:
2648 self.new_hvparams[hv_name] = hv_dict
2650 self.new_hvparams[hv_name].update(hv_dict)
2652 # os hypervisor parameters
2653 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2655 for os_name, hvs in self.op.os_hvp.items():
2656 if os_name not in self.new_os_hvp:
2657 self.new_os_hvp[os_name] = hvs
2659 for hv_name, hv_dict in hvs.items():
2660 if hv_name not in self.new_os_hvp[os_name]:
2661 self.new_os_hvp[os_name][hv_name] = hv_dict
2663 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2666 self.new_osp = objects.FillDict(cluster.osparams, {})
2667 if self.op.osparams:
2668 for os_name, osp in self.op.osparams.items():
2669 if os_name not in self.new_osp:
2670 self.new_osp[os_name] = {}
2672 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2675 if not self.new_osp[os_name]:
2676 # we removed all parameters
2677 del self.new_osp[os_name]
2679 # check the parameter validity (remote check)
2680 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2681 os_name, self.new_osp[os_name])
2683 # changes to the hypervisor list
2684 if self.op.enabled_hypervisors is not None:
2685 self.hv_list = self.op.enabled_hypervisors
2686 for hv in self.hv_list:
2687 # if the hypervisor doesn't already exist in the cluster
2688 # hvparams, we initialize it to empty, and then (in both
2689 # cases) we make sure to fill the defaults, as we might not
2690 # have a complete defaults list if the hypervisor wasn't
2692 if hv not in new_hvp:
2694 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2695 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2697 self.hv_list = cluster.enabled_hypervisors
2699 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2700 # either the enabled list has changed, or the parameters have, validate
2701 for hv_name, hv_params in self.new_hvparams.items():
2702 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2703 (self.op.enabled_hypervisors and
2704 hv_name in self.op.enabled_hypervisors)):
2705 # either this is a new hypervisor, or its parameters have changed
2706 hv_class = hypervisor.GetHypervisor(hv_name)
2707 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2708 hv_class.CheckParameterSyntax(hv_params)
2709 _CheckHVParams(self, node_list, hv_name, hv_params)
2712 # no need to check any newly-enabled hypervisors, since the
2713 # defaults have already been checked in the above code-block
2714 for os_name, os_hvp in self.new_os_hvp.items():
2715 for hv_name, hv_params in os_hvp.items():
2716 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2717 # we need to fill in the new os_hvp on top of the actual hv_p
2718 cluster_defaults = self.new_hvparams.get(hv_name, {})
2719 new_osp = objects.FillDict(cluster_defaults, hv_params)
2720 hv_class = hypervisor.GetHypervisor(hv_name)
2721 hv_class.CheckParameterSyntax(new_osp)
2722 _CheckHVParams(self, node_list, hv_name, new_osp)
2725 def Exec(self, feedback_fn):
2726 """Change the parameters of the cluster.
2729 if self.op.vg_name is not None:
2730 new_volume = self.op.vg_name
2733 if new_volume != self.cfg.GetVGName():
2734 self.cfg.SetVGName(new_volume)
2736 feedback_fn("Cluster LVM configuration already in desired"
2737 " state, not changing")
2738 if self.op.hvparams:
2739 self.cluster.hvparams = self.new_hvparams
2741 self.cluster.os_hvp = self.new_os_hvp
2742 if self.op.enabled_hypervisors is not None:
2743 self.cluster.hvparams = self.new_hvparams
2744 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2745 if self.op.beparams:
2746 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2747 if self.op.nicparams:
2748 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2749 if self.op.osparams:
2750 self.cluster.osparams = self.new_osp
2752 if self.op.candidate_pool_size is not None:
2753 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2754 # we need to update the pool size here, otherwise the save will fail
2755 _AdjustCandidatePool(self, [])
2757 if self.op.maintain_node_health is not None:
2758 self.cluster.maintain_node_health = self.op.maintain_node_health
2760 if self.op.add_uids is not None:
2761 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2763 if self.op.remove_uids is not None:
2764 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2766 if self.op.uid_pool is not None:
2767 self.cluster.uid_pool = self.op.uid_pool
2769 self.cfg.Update(self.cluster, feedback_fn)
2772 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2773 """Distribute additional files which are part of the cluster configuration.
2775 ConfigWriter takes care of distributing the config and ssconf files, but
2776 there are more files which should be distributed to all nodes. This function
2777 makes sure those are copied.
2779 @param lu: calling logical unit
2780 @param additional_nodes: list of nodes not in the config to distribute to
2783 # 1. Gather target nodes
2784 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2785 dist_nodes = lu.cfg.GetOnlineNodeList()
2786 if additional_nodes is not None:
2787 dist_nodes.extend(additional_nodes)
2788 if myself.name in dist_nodes:
2789 dist_nodes.remove(myself.name)
2791 # 2. Gather files to distribute
2792 dist_files = set([constants.ETC_HOSTS,
2793 constants.SSH_KNOWN_HOSTS_FILE,
2794 constants.RAPI_CERT_FILE,
2795 constants.RAPI_USERS_FILE,
2796 constants.CONFD_HMAC_KEY,
2797 constants.CLUSTER_DOMAIN_SECRET_FILE,
2800 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2801 for hv_name in enabled_hypervisors:
2802 hv_class = hypervisor.GetHypervisor(hv_name)
2803 dist_files.update(hv_class.GetAncillaryFiles())
2805 # 3. Perform the files upload
2806 for fname in dist_files:
2807 if os.path.exists(fname):
2808 result = lu.rpc.call_upload_file(dist_nodes, fname)
2809 for to_node, to_result in result.items():
2810 msg = to_result.fail_msg
2812 msg = ("Copy of file %s to node %s failed: %s" %
2813 (fname, to_node, msg))
2814 lu.proc.LogWarning(msg)
2817 class LURedistributeConfig(NoHooksLU):
2818 """Force the redistribution of cluster configuration.
2820 This is a very simple LU.
2826 def ExpandNames(self):
2827 self.needed_locks = {
2828 locking.LEVEL_NODE: locking.ALL_SET,
2830 self.share_locks[locking.LEVEL_NODE] = 1
2832 def Exec(self, feedback_fn):
2833 """Redistribute the configuration.
2836 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2837 _RedistributeAncillaryFiles(self)
2840 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2841 """Sleep and poll for an instance's disk to sync.
2844 if not instance.disks or disks is not None and not disks:
2847 disks = _ExpandCheckDisks(instance, disks)
2850 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2852 node = instance.primary_node
2855 lu.cfg.SetDiskID(dev, node)
2857 # TODO: Convert to utils.Retry
2860 degr_retries = 10 # in seconds, as we sleep 1 second each time
2864 cumul_degraded = False
2865 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2866 msg = rstats.fail_msg
2868 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2871 raise errors.RemoteError("Can't contact node %s for mirror data,"
2872 " aborting." % node)
2875 rstats = rstats.payload
2877 for i, mstat in enumerate(rstats):
2879 lu.LogWarning("Can't compute data for node %s/%s",
2880 node, disks[i].iv_name)
2883 cumul_degraded = (cumul_degraded or
2884 (mstat.is_degraded and mstat.sync_percent is None))
2885 if mstat.sync_percent is not None:
2887 if mstat.estimated_time is not None:
2888 rem_time = ("%s remaining (estimated)" %
2889 utils.FormatSeconds(mstat.estimated_time))
2890 max_time = mstat.estimated_time
2892 rem_time = "no time estimate"
2893 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2894 (disks[i].iv_name, mstat.sync_percent, rem_time))
2896 # if we're done but degraded, let's do a few small retries, to
2897 # make sure we see a stable and not transient situation; therefore
2898 # we force restart of the loop
2899 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2900 logging.info("Degraded disks found, %d retries left", degr_retries)
2908 time.sleep(min(60, max_time))
2911 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2912 return not cumul_degraded
2915 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2916 """Check that mirrors are not degraded.
2918 The ldisk parameter, if True, will change the test from the
2919 is_degraded attribute (which represents overall non-ok status for
2920 the device(s)) to the ldisk (representing the local storage status).
2923 lu.cfg.SetDiskID(dev, node)
2927 if on_primary or dev.AssembleOnSecondary():
2928 rstats = lu.rpc.call_blockdev_find(node, dev)
2929 msg = rstats.fail_msg
2931 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2933 elif not rstats.payload:
2934 lu.LogWarning("Can't find disk on node %s", node)
2938 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2940 result = result and not rstats.payload.is_degraded
2943 for child in dev.children:
2944 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2949 class LUDiagnoseOS(NoHooksLU):
2950 """Logical unit for OS diagnose/query.
2954 ("output_fields", _TListOf(_TNEString)),
2955 ("names", _TListOf(_TNEString)),
2958 _FIELDS_STATIC = utils.FieldSet()
2959 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2960 "parameters", "api_versions")
2962 def CheckArguments(self):
2964 raise errors.OpPrereqError("Selective OS query not supported",
2967 _CheckOutputFields(static=self._FIELDS_STATIC,
2968 dynamic=self._FIELDS_DYNAMIC,
2969 selected=self.op.output_fields)
2971 def ExpandNames(self):
2972 # Lock all nodes, in shared mode
2973 # Temporary removal of locks, should be reverted later
2974 # TODO: reintroduce locks when they are lighter-weight
2975 self.needed_locks = {}
2976 #self.share_locks[locking.LEVEL_NODE] = 1
2977 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2980 def _DiagnoseByOS(rlist):
2981 """Remaps a per-node return list into an a per-os per-node dictionary
2983 @param rlist: a map with node names as keys and OS objects as values
2986 @return: a dictionary with osnames as keys and as value another
2987 map, with nodes as keys and tuples of (path, status, diagnose,
2988 variants, parameters, api_versions) as values, eg::
2990 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2991 (/srv/..., False, "invalid api")],
2992 "node2": [(/srv/..., True, "", [], [])]}
2997 # we build here the list of nodes that didn't fail the RPC (at RPC
2998 # level), so that nodes with a non-responding node daemon don't
2999 # make all OSes invalid
3000 good_nodes = [node_name for node_name in rlist
3001 if not rlist[node_name].fail_msg]
3002 for node_name, nr in rlist.items():
3003 if nr.fail_msg or not nr.payload:
3005 for (name, path, status, diagnose, variants,
3006 params, api_versions) in nr.payload:
3007 if name not in all_os:
3008 # build a list of nodes for this os containing empty lists
3009 # for each node in node_list
3011 for nname in good_nodes:
3012 all_os[name][nname] = []
3013 # convert params from [name, help] to (name, help)
3014 params = [tuple(v) for v in params]
3015 all_os[name][node_name].append((path, status, diagnose,
3016 variants, params, api_versions))
3019 def Exec(self, feedback_fn):
3020 """Compute the list of OSes.
3023 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3024 node_data = self.rpc.call_os_diagnose(valid_nodes)
3025 pol = self._DiagnoseByOS(node_data)
3028 for os_name, os_data in pol.items():
3031 (variants, params, api_versions) = null_state = (set(), set(), set())
3032 for idx, osl in enumerate(os_data.values()):
3033 valid = bool(valid and osl and osl[0][1])
3035 (variants, params, api_versions) = null_state
3037 node_variants, node_params, node_api = osl[0][3:6]
3038 if idx == 0: # first entry
3039 variants = set(node_variants)
3040 params = set(node_params)
3041 api_versions = set(node_api)
3042 else: # keep consistency
3043 variants.intersection_update(node_variants)
3044 params.intersection_update(node_params)
3045 api_versions.intersection_update(node_api)
3047 for field in self.op.output_fields:
3050 elif field == "valid":
3052 elif field == "node_status":
3053 # this is just a copy of the dict
3055 for node_name, nos_list in os_data.items():
3056 val[node_name] = nos_list
3057 elif field == "variants":
3058 val = list(variants)
3059 elif field == "parameters":
3061 elif field == "api_versions":
3062 val = list(api_versions)
3064 raise errors.ParameterError(field)
3071 class LURemoveNode(LogicalUnit):
3072 """Logical unit for removing a node.
3075 HPATH = "node-remove"
3076 HTYPE = constants.HTYPE_NODE
3077 _OP_REQP = [("node_name", _TNEString)]
3079 def BuildHooksEnv(self):
3082 This doesn't run on the target node in the pre phase as a failed
3083 node would then be impossible to remove.
3087 "OP_TARGET": self.op.node_name,
3088 "NODE_NAME": self.op.node_name,
3090 all_nodes = self.cfg.GetNodeList()
3092 all_nodes.remove(self.op.node_name)
3094 logging.warning("Node %s which is about to be removed not found"
3095 " in the all nodes list", self.op.node_name)
3096 return env, all_nodes, all_nodes
3098 def CheckPrereq(self):
3099 """Check prerequisites.
3102 - the node exists in the configuration
3103 - it does not have primary or secondary instances
3104 - it's not the master
3106 Any errors are signaled by raising errors.OpPrereqError.
3109 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3110 node = self.cfg.GetNodeInfo(self.op.node_name)
3111 assert node is not None
3113 instance_list = self.cfg.GetInstanceList()
3115 masternode = self.cfg.GetMasterNode()
3116 if node.name == masternode:
3117 raise errors.OpPrereqError("Node is the master node,"
3118 " you need to failover first.",
3121 for instance_name in instance_list:
3122 instance = self.cfg.GetInstanceInfo(instance_name)
3123 if node.name in instance.all_nodes:
3124 raise errors.OpPrereqError("Instance %s is still running on the node,"
3125 " please remove first." % instance_name,
3127 self.op.node_name = node.name
3130 def Exec(self, feedback_fn):
3131 """Removes the node from the cluster.
3135 logging.info("Stopping the node daemon and removing configs from node %s",
3138 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3140 # Promote nodes to master candidate as needed
3141 _AdjustCandidatePool(self, exceptions=[node.name])
3142 self.context.RemoveNode(node.name)
3144 # Run post hooks on the node before it's removed
3145 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3147 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3149 # pylint: disable-msg=W0702
3150 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3152 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3153 msg = result.fail_msg
3155 self.LogWarning("Errors encountered on the remote node while leaving"
3156 " the cluster: %s", msg)
3158 # Remove node from our /etc/hosts
3159 if self.cfg.GetClusterInfo().modify_etc_hosts:
3160 # FIXME: this should be done via an rpc call to node daemon
3161 utils.RemoveHostFromEtcHosts(node.name)
3162 _RedistributeAncillaryFiles(self)
3165 class LUQueryNodes(NoHooksLU):
3166 """Logical unit for querying nodes.
3169 # pylint: disable-msg=W0142
3171 ("output_fields", _TListOf(_TNEString)),
3172 ("names", _TListOf(_TNEString)),
3173 ("use_locking", _TBool),
3177 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3178 "master_candidate", "offline", "drained"]
3180 _FIELDS_DYNAMIC = utils.FieldSet(
3182 "mtotal", "mnode", "mfree",
3184 "ctotal", "cnodes", "csockets",
3187 _FIELDS_STATIC = utils.FieldSet(*[
3188 "pinst_cnt", "sinst_cnt",
3189 "pinst_list", "sinst_list",
3190 "pip", "sip", "tags",
3192 "role"] + _SIMPLE_FIELDS
3195 def CheckArguments(self):
3196 _CheckOutputFields(static=self._FIELDS_STATIC,
3197 dynamic=self._FIELDS_DYNAMIC,
3198 selected=self.op.output_fields)
3200 def ExpandNames(self):
3201 self.needed_locks = {}
3202 self.share_locks[locking.LEVEL_NODE] = 1
3205 self.wanted = _GetWantedNodes(self, self.op.names)
3207 self.wanted = locking.ALL_SET
3209 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3210 self.do_locking = self.do_node_query and self.op.use_locking
3212 # if we don't request only static fields, we need to lock the nodes
3213 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3215 def Exec(self, feedback_fn):
3216 """Computes the list of nodes and their attributes.
3219 all_info = self.cfg.GetAllNodesInfo()
3221 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3222 elif self.wanted != locking.ALL_SET:
3223 nodenames = self.wanted
3224 missing = set(nodenames).difference(all_info.keys())
3226 raise errors.OpExecError(
3227 "Some nodes were removed before retrieving their data: %s" % missing)
3229 nodenames = all_info.keys()
3231 nodenames = utils.NiceSort(nodenames)
3232 nodelist = [all_info[name] for name in nodenames]
3234 # begin data gathering
3236 if self.do_node_query:
3238 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3239 self.cfg.GetHypervisorType())
3240 for name in nodenames:
3241 nodeinfo = node_data[name]
3242 if not nodeinfo.fail_msg and nodeinfo.payload:
3243 nodeinfo = nodeinfo.payload
3244 fn = utils.TryConvert
3246 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3247 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3248 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3249 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3250 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3251 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3252 "bootid": nodeinfo.get('bootid', None),
3253 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3254 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3257 live_data[name] = {}
3259 live_data = dict.fromkeys(nodenames, {})
3261 node_to_primary = dict([(name, set()) for name in nodenames])
3262 node_to_secondary = dict([(name, set()) for name in nodenames])
3264 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3265 "sinst_cnt", "sinst_list"))
3266 if inst_fields & frozenset(self.op.output_fields):
3267 inst_data = self.cfg.GetAllInstancesInfo()
3269 for inst in inst_data.values():
3270 if inst.primary_node in node_to_primary:
3271 node_to_primary[inst.primary_node].add(inst.name)
3272 for secnode in inst.secondary_nodes:
3273 if secnode in node_to_secondary:
3274 node_to_secondary[secnode].add(inst.name)
3276 master_node = self.cfg.GetMasterNode()
3278 # end data gathering
3281 for node in nodelist:
3283 for field in self.op.output_fields:
3284 if field in self._SIMPLE_FIELDS:
3285 val = getattr(node, field)
3286 elif field == "pinst_list":
3287 val = list(node_to_primary[node.name])
3288 elif field == "sinst_list":
3289 val = list(node_to_secondary[node.name])
3290 elif field == "pinst_cnt":
3291 val = len(node_to_primary[node.name])
3292 elif field == "sinst_cnt":
3293 val = len(node_to_secondary[node.name])
3294 elif field == "pip":
3295 val = node.primary_ip
3296 elif field == "sip":
3297 val = node.secondary_ip
3298 elif field == "tags":
3299 val = list(node.GetTags())
3300 elif field == "master":
3301 val = node.name == master_node
3302 elif self._FIELDS_DYNAMIC.Matches(field):
3303 val = live_data[node.name].get(field, None)
3304 elif field == "role":
3305 if node.name == master_node:
3307 elif node.master_candidate:
3316 raise errors.ParameterError(field)
3317 node_output.append(val)
3318 output.append(node_output)
3323 class LUQueryNodeVolumes(NoHooksLU):
3324 """Logical unit for getting volumes on node(s).
3328 ("nodes", _TListOf(_TNEString)),
3329 ("output_fields", _TListOf(_TNEString)),
3332 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3333 _FIELDS_STATIC = utils.FieldSet("node")
3335 def CheckArguments(self):
3336 _CheckOutputFields(static=self._FIELDS_STATIC,
3337 dynamic=self._FIELDS_DYNAMIC,
3338 selected=self.op.output_fields)
3340 def ExpandNames(self):
3341 self.needed_locks = {}
3342 self.share_locks[locking.LEVEL_NODE] = 1
3343 if not self.op.nodes:
3344 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3346 self.needed_locks[locking.LEVEL_NODE] = \
3347 _GetWantedNodes(self, self.op.nodes)
3349 def Exec(self, feedback_fn):
3350 """Computes the list of nodes and their attributes.
3353 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3354 volumes = self.rpc.call_node_volumes(nodenames)
3356 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3357 in self.cfg.GetInstanceList()]
3359 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3362 for node in nodenames:
3363 nresult = volumes[node]
3366 msg = nresult.fail_msg
3368 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3371 node_vols = nresult.payload[:]
3372 node_vols.sort(key=lambda vol: vol['dev'])
3374 for vol in node_vols:
3376 for field in self.op.output_fields:
3379 elif field == "phys":
3383 elif field == "name":
3385 elif field == "size":
3386 val = int(float(vol['size']))
3387 elif field == "instance":
3389 if node not in lv_by_node[inst]:
3391 if vol['name'] in lv_by_node[inst][node]:
3397 raise errors.ParameterError(field)
3398 node_output.append(str(val))
3400 output.append(node_output)
3405 class LUQueryNodeStorage(NoHooksLU):
3406 """Logical unit for getting information on storage units on node(s).
3409 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3411 ("nodes", _TListOf(_TNEString)),
3412 ("storage_type", _CheckStorageType),
3413 ("output_fields", _TListOf(_TNEString)),
3415 _OP_DEFS = [("name", None)]
3418 def CheckArguments(self):
3419 _CheckOutputFields(static=self._FIELDS_STATIC,
3420 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3421 selected=self.op.output_fields)
3423 def ExpandNames(self):
3424 self.needed_locks = {}
3425 self.share_locks[locking.LEVEL_NODE] = 1
3428 self.needed_locks[locking.LEVEL_NODE] = \
3429 _GetWantedNodes(self, self.op.nodes)
3431 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3433 def Exec(self, feedback_fn):
3434 """Computes the list of nodes and their attributes.
3437 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3439 # Always get name to sort by
3440 if constants.SF_NAME in self.op.output_fields:
3441 fields = self.op.output_fields[:]
3443 fields = [constants.SF_NAME] + self.op.output_fields
3445 # Never ask for node or type as it's only known to the LU
3446 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3447 while extra in fields:
3448 fields.remove(extra)
3450 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3451 name_idx = field_idx[constants.SF_NAME]
3453 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3454 data = self.rpc.call_storage_list(self.nodes,
3455 self.op.storage_type, st_args,
3456 self.op.name, fields)
3460 for node in utils.NiceSort(self.nodes):
3461 nresult = data[node]
3465 msg = nresult.fail_msg
3467 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3470 rows = dict([(row[name_idx], row) for row in nresult.payload])
3472 for name in utils.NiceSort(rows.keys()):
3477 for field in self.op.output_fields:
3478 if field == constants.SF_NODE:
3480 elif field == constants.SF_TYPE:
3481 val = self.op.storage_type
3482 elif field in field_idx:
3483 val = row[field_idx[field]]
3485 raise errors.ParameterError(field)
3494 class LUModifyNodeStorage(NoHooksLU):
3495 """Logical unit for modifying a storage volume on a node.
3499 ("node_name", _TNEString),
3500 ("storage_type", _CheckStorageType),
3501 ("name", _TNEString),
3502 ("changes", _TDict),
3506 def CheckArguments(self):
3507 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3509 storage_type = self.op.storage_type
3512 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3514 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3515 " modified" % storage_type,
3518 diff = set(self.op.changes.keys()) - modifiable
3520 raise errors.OpPrereqError("The following fields can not be modified for"
3521 " storage units of type '%s': %r" %
3522 (storage_type, list(diff)),
3525 def ExpandNames(self):
3526 self.needed_locks = {
3527 locking.LEVEL_NODE: self.op.node_name,
3530 def Exec(self, feedback_fn):
3531 """Computes the list of nodes and their attributes.
3534 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3535 result = self.rpc.call_storage_modify(self.op.node_name,
3536 self.op.storage_type, st_args,
3537 self.op.name, self.op.changes)
3538 result.Raise("Failed to modify storage unit '%s' on %s" %
3539 (self.op.name, self.op.node_name))
3542 class LUAddNode(LogicalUnit):
3543 """Logical unit for adding node to the cluster.
3547 HTYPE = constants.HTYPE_NODE
3549 ("node_name", _TNEString),
3551 _OP_DEFS = [("secondary_ip", None)]
3553 def CheckArguments(self):
3554 # validate/normalize the node name
3555 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3557 def BuildHooksEnv(self):
3560 This will run on all nodes before, and on all nodes + the new node after.
3564 "OP_TARGET": self.op.node_name,
3565 "NODE_NAME": self.op.node_name,
3566 "NODE_PIP": self.op.primary_ip,
3567 "NODE_SIP": self.op.secondary_ip,
3569 nodes_0 = self.cfg.GetNodeList()
3570 nodes_1 = nodes_0 + [self.op.node_name, ]
3571 return env, nodes_0, nodes_1
3573 def CheckPrereq(self):
3574 """Check prerequisites.
3577 - the new node is not already in the config
3579 - its parameters (single/dual homed) matches the cluster
3581 Any errors are signaled by raising errors.OpPrereqError.
3584 node_name = self.op.node_name
3587 dns_data = utils.GetHostInfo(node_name)
3589 node = dns_data.name
3590 primary_ip = self.op.primary_ip = dns_data.ip
3591 if self.op.secondary_ip is None:
3592 self.op.secondary_ip = primary_ip
3593 if not utils.IsValidIP(self.op.secondary_ip):
3594 raise errors.OpPrereqError("Invalid secondary IP given",
3596 secondary_ip = self.op.secondary_ip
3598 node_list = cfg.GetNodeList()
3599 if not self.op.readd and node in node_list:
3600 raise errors.OpPrereqError("Node %s is already in the configuration" %
3601 node, errors.ECODE_EXISTS)
3602 elif self.op.readd and node not in node_list:
3603 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3606 self.changed_primary_ip = False
3608 for existing_node_name in node_list:
3609 existing_node = cfg.GetNodeInfo(existing_node_name)
3611 if self.op.readd and node == existing_node_name:
3612 if existing_node.secondary_ip != secondary_ip:
3613 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3614 " address configuration as before",
3616 if existing_node.primary_ip != primary_ip:
3617 self.changed_primary_ip = True
3621 if (existing_node.primary_ip == primary_ip or
3622 existing_node.secondary_ip == primary_ip or
3623 existing_node.primary_ip == secondary_ip or
3624 existing_node.secondary_ip == secondary_ip):
3625 raise errors.OpPrereqError("New node ip address(es) conflict with"
3626 " existing node %s" % existing_node.name,
3627 errors.ECODE_NOTUNIQUE)
3629 # check that the type of the node (single versus dual homed) is the
3630 # same as for the master
3631 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3632 master_singlehomed = myself.secondary_ip == myself.primary_ip
3633 newbie_singlehomed = secondary_ip == primary_ip
3634 if master_singlehomed != newbie_singlehomed:
3635 if master_singlehomed:
3636 raise errors.OpPrereqError("The master has no private ip but the"
3637 " new node has one",
3640 raise errors.OpPrereqError("The master has a private ip but the"
3641 " new node doesn't have one",
3644 # checks reachability
3645 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3646 raise errors.OpPrereqError("Node not reachable by ping",
3647 errors.ECODE_ENVIRON)
3649 if not newbie_singlehomed:
3650 # check reachability from my secondary ip to newbie's secondary ip
3651 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3652 source=myself.secondary_ip):
3653 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3654 " based ping to noded port",
3655 errors.ECODE_ENVIRON)
3662 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3665 self.new_node = self.cfg.GetNodeInfo(node)
3666 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3668 self.new_node = objects.Node(name=node,
3669 primary_ip=primary_ip,
3670 secondary_ip=secondary_ip,
3671 master_candidate=self.master_candidate,
3672 offline=False, drained=False)
3674 def Exec(self, feedback_fn):
3675 """Adds the new node to the cluster.
3678 new_node = self.new_node
3679 node = new_node.name
3681 # for re-adds, reset the offline/drained/master-candidate flags;
3682 # we need to reset here, otherwise offline would prevent RPC calls
3683 # later in the procedure; this also means that if the re-add
3684 # fails, we are left with a non-offlined, broken node
3686 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3687 self.LogInfo("Readding a node, the offline/drained flags were reset")
3688 # if we demote the node, we do cleanup later in the procedure
3689 new_node.master_candidate = self.master_candidate
3690 if self.changed_primary_ip:
3691 new_node.primary_ip = self.op.primary_ip
3693 # notify the user about any possible mc promotion
3694 if new_node.master_candidate:
3695 self.LogInfo("Node will be a master candidate")
3697 # check connectivity
3698 result = self.rpc.call_version([node])[node]
3699 result.Raise("Can't get version information from node %s" % node)
3700 if constants.PROTOCOL_VERSION == result.payload:
3701 logging.info("Communication to node %s fine, sw version %s match",
3702 node, result.payload)
3704 raise errors.OpExecError("Version mismatch master version %s,"
3705 " node version %s" %
3706 (constants.PROTOCOL_VERSION, result.payload))
3709 if self.cfg.GetClusterInfo().modify_ssh_setup:
3710 logging.info("Copy ssh key to node %s", node)
3711 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3713 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3714 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3718 keyarray.append(utils.ReadFile(i))
3720 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3721 keyarray[2], keyarray[3], keyarray[4],
3723 result.Raise("Cannot transfer ssh keys to the new node")
3725 # Add node to our /etc/hosts, and add key to known_hosts
3726 if self.cfg.GetClusterInfo().modify_etc_hosts:
3727 # FIXME: this should be done via an rpc call to node daemon
3728 utils.AddHostToEtcHosts(new_node.name)
3730 if new_node.secondary_ip != new_node.primary_ip:
3731 result = self.rpc.call_node_has_ip_address(new_node.name,
3732 new_node.secondary_ip)
3733 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3734 prereq=True, ecode=errors.ECODE_ENVIRON)
3735 if not result.payload:
3736 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3737 " you gave (%s). Please fix and re-run this"
3738 " command." % new_node.secondary_ip)
3740 node_verify_list = [self.cfg.GetMasterNode()]
3741 node_verify_param = {
3742 constants.NV_NODELIST: [node],
3743 # TODO: do a node-net-test as well?
3746 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3747 self.cfg.GetClusterName())
3748 for verifier in node_verify_list:
3749 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3750 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3752 for failed in nl_payload:
3753 feedback_fn("ssh/hostname verification failed"
3754 " (checking from %s): %s" %
3755 (verifier, nl_payload[failed]))
3756 raise errors.OpExecError("ssh/hostname verification failed.")
3759 _RedistributeAncillaryFiles(self)
3760 self.context.ReaddNode(new_node)
3761 # make sure we redistribute the config
3762 self.cfg.Update(new_node, feedback_fn)
3763 # and make sure the new node will not have old files around
3764 if not new_node.master_candidate:
3765 result = self.rpc.call_node_demote_from_mc(new_node.name)
3766 msg = result.fail_msg
3768 self.LogWarning("Node failed to demote itself from master"
3769 " candidate status: %s" % msg)
3771 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3772 self.context.AddNode(new_node, self.proc.GetECId())
3775 class LUSetNodeParams(LogicalUnit):
3776 """Modifies the parameters of a node.
3779 HPATH = "node-modify"
3780 HTYPE = constants.HTYPE_NODE
3781 _OP_REQP = [("node_name", _TNEString)]
3784 def CheckArguments(self):
3785 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3786 _CheckBooleanOpField(self.op, 'master_candidate')
3787 _CheckBooleanOpField(self.op, 'offline')
3788 _CheckBooleanOpField(self.op, 'drained')
3789 _CheckBooleanOpField(self.op, 'auto_promote')
3790 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3791 if all_mods.count(None) == 3:
3792 raise errors.OpPrereqError("Please pass at least one modification",
3794 if all_mods.count(True) > 1:
3795 raise errors.OpPrereqError("Can't set the node into more than one"
3796 " state at the same time",
3799 # Boolean value that tells us whether we're offlining or draining the node
3800 self.offline_or_drain = (self.op.offline == True or
3801 self.op.drained == True)
3802 self.deoffline_or_drain = (self.op.offline == False or
3803 self.op.drained == False)
3804 self.might_demote = (self.op.master_candidate == False or
3805 self.offline_or_drain)
3807 self.lock_all = self.op.auto_promote and self.might_demote
3810 def ExpandNames(self):
3812 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3814 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3816 def BuildHooksEnv(self):
3819 This runs on the master node.
3823 "OP_TARGET": self.op.node_name,
3824 "MASTER_CANDIDATE": str(self.op.master_candidate),
3825 "OFFLINE": str(self.op.offline),
3826 "DRAINED": str(self.op.drained),
3828 nl = [self.cfg.GetMasterNode(),
3832 def CheckPrereq(self):
3833 """Check prerequisites.
3835 This only checks the instance list against the existing names.
3838 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3840 if (self.op.master_candidate is not None or
3841 self.op.drained is not None or
3842 self.op.offline is not None):
3843 # we can't change the master's node flags
3844 if self.op.node_name == self.cfg.GetMasterNode():
3845 raise errors.OpPrereqError("The master role can be changed"
3846 " only via masterfailover",
3850 if node.master_candidate and self.might_demote and not self.lock_all:
3851 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3852 # check if after removing the current node, we're missing master
3854 (mc_remaining, mc_should, _) = \
3855 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3856 if mc_remaining < mc_should:
3857 raise errors.OpPrereqError("Not enough master candidates, please"
3858 " pass auto_promote to allow promotion",
3861 if (self.op.master_candidate == True and
3862 ((node.offline and not self.op.offline == False) or
3863 (node.drained and not self.op.drained == False))):
3864 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3865 " to master_candidate" % node.name,
3868 # If we're being deofflined/drained, we'll MC ourself if needed
3869 if (self.deoffline_or_drain and not self.offline_or_drain and not
3870 self.op.master_candidate == True and not node.master_candidate):
3871 self.op.master_candidate = _DecideSelfPromotion(self)
3872 if self.op.master_candidate:
3873 self.LogInfo("Autopromoting node to master candidate")
3877 def Exec(self, feedback_fn):
3886 if self.op.offline is not None:
3887 node.offline = self.op.offline
3888 result.append(("offline", str(self.op.offline)))
3889 if self.op.offline == True:
3890 if node.master_candidate:
3891 node.master_candidate = False
3893 result.append(("master_candidate", "auto-demotion due to offline"))
3895 node.drained = False
3896 result.append(("drained", "clear drained status due to offline"))
3898 if self.op.master_candidate is not None:
3899 node.master_candidate = self.op.master_candidate
3901 result.append(("master_candidate", str(self.op.master_candidate)))
3902 if self.op.master_candidate == False:
3903 rrc = self.rpc.call_node_demote_from_mc(node.name)
3906 self.LogWarning("Node failed to demote itself: %s" % msg)
3908 if self.op.drained is not None:
3909 node.drained = self.op.drained
3910 result.append(("drained", str(self.op.drained)))
3911 if self.op.drained == True:
3912 if node.master_candidate:
3913 node.master_candidate = False
3915 result.append(("master_candidate", "auto-demotion due to drain"))
3916 rrc = self.rpc.call_node_demote_from_mc(node.name)
3919 self.LogWarning("Node failed to demote itself: %s" % msg)
3921 node.offline = False
3922 result.append(("offline", "clear offline status due to drain"))
3924 # we locked all nodes, we adjust the CP before updating this node
3926 _AdjustCandidatePool(self, [node.name])
3928 # this will trigger configuration file update, if needed
3929 self.cfg.Update(node, feedback_fn)
3931 # this will trigger job queue propagation or cleanup
3933 self.context.ReaddNode(node)
3938 class LUPowercycleNode(NoHooksLU):
3939 """Powercycles a node.
3943 ("node_name", _TNEString),
3948 def CheckArguments(self):
3949 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3950 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3951 raise errors.OpPrereqError("The node is the master and the force"
3952 " parameter was not set",
3955 def ExpandNames(self):
3956 """Locking for PowercycleNode.
3958 This is a last-resort option and shouldn't block on other
3959 jobs. Therefore, we grab no locks.
3962 self.needed_locks = {}
3964 def Exec(self, feedback_fn):
3968 result = self.rpc.call_node_powercycle(self.op.node_name,
3969 self.cfg.GetHypervisorType())
3970 result.Raise("Failed to schedule the reboot")
3971 return result.payload
3974 class LUQueryClusterInfo(NoHooksLU):
3975 """Query cluster configuration.
3981 def ExpandNames(self):
3982 self.needed_locks = {}
3984 def Exec(self, feedback_fn):
3985 """Return cluster config.
3988 cluster = self.cfg.GetClusterInfo()
3991 # Filter just for enabled hypervisors
3992 for os_name, hv_dict in cluster.os_hvp.items():
3993 os_hvp[os_name] = {}
3994 for hv_name, hv_params in hv_dict.items():
3995 if hv_name in cluster.enabled_hypervisors:
3996 os_hvp[os_name][hv_name] = hv_params
3999 "software_version": constants.RELEASE_VERSION,
4000 "protocol_version": constants.PROTOCOL_VERSION,
4001 "config_version": constants.CONFIG_VERSION,
4002 "os_api_version": max(constants.OS_API_VERSIONS),
4003 "export_version": constants.EXPORT_VERSION,
4004 "architecture": (platform.architecture()[0], platform.machine()),
4005 "name": cluster.cluster_name,
4006 "master": cluster.master_node,
4007 "default_hypervisor": cluster.enabled_hypervisors[0],
4008 "enabled_hypervisors": cluster.enabled_hypervisors,
4009 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4010 for hypervisor_name in cluster.enabled_hypervisors]),
4012 "beparams": cluster.beparams,
4013 "osparams": cluster.osparams,
4014 "nicparams": cluster.nicparams,
4015 "candidate_pool_size": cluster.candidate_pool_size,
4016 "master_netdev": cluster.master_netdev,
4017 "volume_group_name": cluster.volume_group_name,
4018 "file_storage_dir": cluster.file_storage_dir,
4019 "maintain_node_health": cluster.maintain_node_health,
4020 "ctime": cluster.ctime,
4021 "mtime": cluster.mtime,
4022 "uuid": cluster.uuid,
4023 "tags": list(cluster.GetTags()),
4024 "uid_pool": cluster.uid_pool,
4030 class LUQueryConfigValues(NoHooksLU):
4031 """Return configuration values.
4036 _FIELDS_DYNAMIC = utils.FieldSet()
4037 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4040 def CheckArguments(self):
4041 _CheckOutputFields(static=self._FIELDS_STATIC,
4042 dynamic=self._FIELDS_DYNAMIC,
4043 selected=self.op.output_fields)
4045 def ExpandNames(self):
4046 self.needed_locks = {}
4048 def Exec(self, feedback_fn):
4049 """Dump a representation of the cluster config to the standard output.
4053 for field in self.op.output_fields:
4054 if field == "cluster_name":
4055 entry = self.cfg.GetClusterName()
4056 elif field == "master_node":
4057 entry = self.cfg.GetMasterNode()
4058 elif field == "drain_flag":
4059 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4060 elif field == "watcher_pause":
4061 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4063 raise errors.ParameterError(field)
4064 values.append(entry)
4068 class LUActivateInstanceDisks(NoHooksLU):
4069 """Bring up an instance's disks.
4072 _OP_REQP = [("instance_name", _TNEString)]
4073 _OP_DEFS = [("ignore_size", False)]
4076 def ExpandNames(self):
4077 self._ExpandAndLockInstance()
4078 self.needed_locks[locking.LEVEL_NODE] = []
4079 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4081 def DeclareLocks(self, level):
4082 if level == locking.LEVEL_NODE:
4083 self._LockInstancesNodes()
4085 def CheckPrereq(self):
4086 """Check prerequisites.
4088 This checks that the instance is in the cluster.
4091 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4092 assert self.instance is not None, \
4093 "Cannot retrieve locked instance %s" % self.op.instance_name
4094 _CheckNodeOnline(self, self.instance.primary_node)
4096 def Exec(self, feedback_fn):
4097 """Activate the disks.
4100 disks_ok, disks_info = \
4101 _AssembleInstanceDisks(self, self.instance,
4102 ignore_size=self.op.ignore_size)
4104 raise errors.OpExecError("Cannot activate block devices")
4109 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4111 """Prepare the block devices for an instance.
4113 This sets up the block devices on all nodes.
4115 @type lu: L{LogicalUnit}
4116 @param lu: the logical unit on whose behalf we execute
4117 @type instance: L{objects.Instance}
4118 @param instance: the instance for whose disks we assemble
4119 @type disks: list of L{objects.Disk} or None
4120 @param disks: which disks to assemble (or all, if None)
4121 @type ignore_secondaries: boolean
4122 @param ignore_secondaries: if true, errors on secondary nodes
4123 won't result in an error return from the function
4124 @type ignore_size: boolean
4125 @param ignore_size: if true, the current known size of the disk
4126 will not be used during the disk activation, useful for cases
4127 when the size is wrong
4128 @return: False if the operation failed, otherwise a list of
4129 (host, instance_visible_name, node_visible_name)
4130 with the mapping from node devices to instance devices
4135 iname = instance.name
4136 disks = _ExpandCheckDisks(instance, disks)
4138 # With the two passes mechanism we try to reduce the window of
4139 # opportunity for the race condition of switching DRBD to primary
4140 # before handshaking occured, but we do not eliminate it
4142 # The proper fix would be to wait (with some limits) until the
4143 # connection has been made and drbd transitions from WFConnection
4144 # into any other network-connected state (Connected, SyncTarget,
4147 # 1st pass, assemble on all nodes in secondary mode
4148 for inst_disk in disks:
4149 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4151 node_disk = node_disk.Copy()
4152 node_disk.UnsetSize()
4153 lu.cfg.SetDiskID(node_disk, node)
4154 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4155 msg = result.fail_msg
4157 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4158 " (is_primary=False, pass=1): %s",
4159 inst_disk.iv_name, node, msg)
4160 if not ignore_secondaries:
4163 # FIXME: race condition on drbd migration to primary
4165 # 2nd pass, do only the primary node
4166 for inst_disk in disks:
4169 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4170 if node != instance.primary_node:
4173 node_disk = node_disk.Copy()
4174 node_disk.UnsetSize()
4175 lu.cfg.SetDiskID(node_disk, node)
4176 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4177 msg = result.fail_msg
4179 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4180 " (is_primary=True, pass=2): %s",
4181 inst_disk.iv_name, node, msg)
4184 dev_path = result.payload
4186 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4188 # leave the disks configured for the primary node
4189 # this is a workaround that would be fixed better by
4190 # improving the logical/physical id handling
4192 lu.cfg.SetDiskID(disk, instance.primary_node)
4194 return disks_ok, device_info
4197 def _StartInstanceDisks(lu, instance, force):
4198 """Start the disks of an instance.
4201 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4202 ignore_secondaries=force)
4204 _ShutdownInstanceDisks(lu, instance)
4205 if force is not None and not force:
4206 lu.proc.LogWarning("", hint="If the message above refers to a"
4208 " you can retry the operation using '--force'.")
4209 raise errors.OpExecError("Disk consistency error")
4212 class LUDeactivateInstanceDisks(NoHooksLU):
4213 """Shutdown an instance's disks.
4216 _OP_REQP = [("instance_name", _TNEString)]
4219 def ExpandNames(self):
4220 self._ExpandAndLockInstance()
4221 self.needed_locks[locking.LEVEL_NODE] = []
4222 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4224 def DeclareLocks(self, level):
4225 if level == locking.LEVEL_NODE:
4226 self._LockInstancesNodes()
4228 def CheckPrereq(self):
4229 """Check prerequisites.
4231 This checks that the instance is in the cluster.
4234 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4235 assert self.instance is not None, \
4236 "Cannot retrieve locked instance %s" % self.op.instance_name
4238 def Exec(self, feedback_fn):
4239 """Deactivate the disks
4242 instance = self.instance
4243 _SafeShutdownInstanceDisks(self, instance)
4246 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4247 """Shutdown block devices of an instance.
4249 This function checks if an instance is running, before calling
4250 _ShutdownInstanceDisks.
4253 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4254 _ShutdownInstanceDisks(lu, instance, disks=disks)
4257 def _ExpandCheckDisks(instance, disks):
4258 """Return the instance disks selected by the disks list
4260 @type disks: list of L{objects.Disk} or None
4261 @param disks: selected disks
4262 @rtype: list of L{objects.Disk}
4263 @return: selected instance disks to act on
4267 return instance.disks
4269 if not set(disks).issubset(instance.disks):
4270 raise errors.ProgrammerError("Can only act on disks belonging to the"
4275 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4276 """Shutdown block devices of an instance.
4278 This does the shutdown on all nodes of the instance.
4280 If the ignore_primary is false, errors on the primary node are
4285 disks = _ExpandCheckDisks(instance, disks)
4288 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4289 lu.cfg.SetDiskID(top_disk, node)
4290 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4291 msg = result.fail_msg
4293 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4294 disk.iv_name, node, msg)
4295 if not ignore_primary or node != instance.primary_node:
4300 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4301 """Checks if a node has enough free memory.
4303 This function check if a given node has the needed amount of free
4304 memory. In case the node has less memory or we cannot get the
4305 information from the node, this function raise an OpPrereqError
4308 @type lu: C{LogicalUnit}
4309 @param lu: a logical unit from which we get configuration data
4311 @param node: the node to check
4312 @type reason: C{str}
4313 @param reason: string to use in the error message
4314 @type requested: C{int}
4315 @param requested: the amount of memory in MiB to check for
4316 @type hypervisor_name: C{str}
4317 @param hypervisor_name: the hypervisor to ask for memory stats
4318 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4319 we cannot check the node
4322 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4323 nodeinfo[node].Raise("Can't get data from node %s" % node,
4324 prereq=True, ecode=errors.ECODE_ENVIRON)
4325 free_mem = nodeinfo[node].payload.get('memory_free', None)
4326 if not isinstance(free_mem, int):
4327 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4328 " was '%s'" % (node, free_mem),
4329 errors.ECODE_ENVIRON)
4330 if requested > free_mem:
4331 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4332 " needed %s MiB, available %s MiB" %
4333 (node, reason, requested, free_mem),
4337 def _CheckNodesFreeDisk(lu, nodenames, requested):
4338 """Checks if nodes have enough free disk space in the default VG.
4340 This function check if all given nodes have the needed amount of
4341 free disk. In case any node has less disk or we cannot get the
4342 information from the node, this function raise an OpPrereqError
4345 @type lu: C{LogicalUnit}
4346 @param lu: a logical unit from which we get configuration data
4347 @type nodenames: C{list}
4348 @param nodenames: the list of node names to check
4349 @type requested: C{int}
4350 @param requested: the amount of disk in MiB to check for
4351 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4352 we cannot check the node
4355 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4356 lu.cfg.GetHypervisorType())
4357 for node in nodenames:
4358 info = nodeinfo[node]
4359 info.Raise("Cannot get current information from node %s" % node,
4360 prereq=True, ecode=errors.ECODE_ENVIRON)
4361 vg_free = info.payload.get("vg_free", None)
4362 if not isinstance(vg_free, int):
4363 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4364 " result was '%s'" % (node, vg_free),
4365 errors.ECODE_ENVIRON)
4366 if requested > vg_free:
4367 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4368 " required %d MiB, available %d MiB" %
4369 (node, requested, vg_free),
4373 class LUStartupInstance(LogicalUnit):
4374 """Starts an instance.
4377 HPATH = "instance-start"
4378 HTYPE = constants.HTYPE_INSTANCE
4380 ("instance_name", _TNEString),
4382 ("beparams", _TDict),
4383 ("hvparams", _TDict),
4386 ("beparams", _EmptyDict),
4387 ("hvparams", _EmptyDict),
4391 def CheckArguments(self):
4393 if self.op.beparams:
4394 # fill the beparams dict
4395 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4397 def ExpandNames(self):
4398 self._ExpandAndLockInstance()
4400 def BuildHooksEnv(self):
4403 This runs on master, primary and secondary nodes of the instance.
4407 "FORCE": self.op.force,
4409 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4410 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4413 def CheckPrereq(self):
4414 """Check prerequisites.
4416 This checks that the instance is in the cluster.
4419 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4420 assert self.instance is not None, \
4421 "Cannot retrieve locked instance %s" % self.op.instance_name
4424 if self.op.hvparams:
4425 # check hypervisor parameter syntax (locally)
4426 cluster = self.cfg.GetClusterInfo()
4427 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4428 filled_hvp = cluster.FillHV(instance)
4429 filled_hvp.update(self.op.hvparams)
4430 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4431 hv_type.CheckParameterSyntax(filled_hvp)
4432 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4434 _CheckNodeOnline(self, instance.primary_node)
4436 bep = self.cfg.GetClusterInfo().FillBE(instance)
4437 # check bridges existence
4438 _CheckInstanceBridgesExist(self, instance)
4440 remote_info = self.rpc.call_instance_info(instance.primary_node,
4442 instance.hypervisor)
4443 remote_info.Raise("Error checking node %s" % instance.primary_node,
4444 prereq=True, ecode=errors.ECODE_ENVIRON)
4445 if not remote_info.payload: # not running already
4446 _CheckNodeFreeMemory(self, instance.primary_node,
4447 "starting instance %s" % instance.name,
4448 bep[constants.BE_MEMORY], instance.hypervisor)
4450 def Exec(self, feedback_fn):
4451 """Start the instance.
4454 instance = self.instance
4455 force = self.op.force
4457 self.cfg.MarkInstanceUp(instance.name)
4459 node_current = instance.primary_node
4461 _StartInstanceDisks(self, instance, force)
4463 result = self.rpc.call_instance_start(node_current, instance,
4464 self.op.hvparams, self.op.beparams)
4465 msg = result.fail_msg
4467 _ShutdownInstanceDisks(self, instance)
4468 raise errors.OpExecError("Could not start instance: %s" % msg)
4471 class LURebootInstance(LogicalUnit):
4472 """Reboot an instance.
4475 HPATH = "instance-reboot"
4476 HTYPE = constants.HTYPE_INSTANCE
4478 ("instance_name", _TNEString),
4479 ("ignore_secondaries", _TBool),
4480 ("reboot_type", _TElemOf(constants.REBOOT_TYPES)),
4482 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4485 def ExpandNames(self):
4486 self._ExpandAndLockInstance()
4488 def BuildHooksEnv(self):
4491 This runs on master, primary and secondary nodes of the instance.
4495 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4496 "REBOOT_TYPE": self.op.reboot_type,
4497 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4499 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4500 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4503 def CheckPrereq(self):
4504 """Check prerequisites.
4506 This checks that the instance is in the cluster.
4509 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4510 assert self.instance is not None, \
4511 "Cannot retrieve locked instance %s" % self.op.instance_name
4513 _CheckNodeOnline(self, instance.primary_node)
4515 # check bridges existence
4516 _CheckInstanceBridgesExist(self, instance)
4518 def Exec(self, feedback_fn):
4519 """Reboot the instance.
4522 instance = self.instance
4523 ignore_secondaries = self.op.ignore_secondaries
4524 reboot_type = self.op.reboot_type
4526 node_current = instance.primary_node
4528 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4529 constants.INSTANCE_REBOOT_HARD]:
4530 for disk in instance.disks:
4531 self.cfg.SetDiskID(disk, node_current)
4532 result = self.rpc.call_instance_reboot(node_current, instance,
4534 self.op.shutdown_timeout)
4535 result.Raise("Could not reboot instance")
4537 result = self.rpc.call_instance_shutdown(node_current, instance,
4538 self.op.shutdown_timeout)
4539 result.Raise("Could not shutdown instance for full reboot")
4540 _ShutdownInstanceDisks(self, instance)
4541 _StartInstanceDisks(self, instance, ignore_secondaries)
4542 result = self.rpc.call_instance_start(node_current, instance, None, None)
4543 msg = result.fail_msg
4545 _ShutdownInstanceDisks(self, instance)
4546 raise errors.OpExecError("Could not start instance for"
4547 " full reboot: %s" % msg)
4549 self.cfg.MarkInstanceUp(instance.name)
4552 class LUShutdownInstance(LogicalUnit):
4553 """Shutdown an instance.
4556 HPATH = "instance-stop"
4557 HTYPE = constants.HTYPE_INSTANCE
4558 _OP_REQP = [("instance_name", _TNEString)]
4559 _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4562 def ExpandNames(self):
4563 self._ExpandAndLockInstance()
4565 def BuildHooksEnv(self):
4568 This runs on master, primary and secondary nodes of the instance.
4571 env = _BuildInstanceHookEnvByObject(self, self.instance)
4572 env["TIMEOUT"] = self.op.timeout
4573 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4576 def CheckPrereq(self):
4577 """Check prerequisites.
4579 This checks that the instance is in the cluster.
4582 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4583 assert self.instance is not None, \
4584 "Cannot retrieve locked instance %s" % self.op.instance_name
4585 _CheckNodeOnline(self, self.instance.primary_node)
4587 def Exec(self, feedback_fn):
4588 """Shutdown the instance.
4591 instance = self.instance
4592 node_current = instance.primary_node
4593 timeout = self.op.timeout
4594 self.cfg.MarkInstanceDown(instance.name)
4595 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4596 msg = result.fail_msg
4598 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4600 _ShutdownInstanceDisks(self, instance)
4603 class LUReinstallInstance(LogicalUnit):
4604 """Reinstall an instance.
4607 HPATH = "instance-reinstall"
4608 HTYPE = constants.HTYPE_INSTANCE
4609 _OP_REQP = [("instance_name", _TNEString)]
4612 ("force_variant", False),
4616 def ExpandNames(self):
4617 self._ExpandAndLockInstance()
4619 def BuildHooksEnv(self):
4622 This runs on master, primary and secondary nodes of the instance.
4625 env = _BuildInstanceHookEnvByObject(self, self.instance)
4626 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4629 def CheckPrereq(self):
4630 """Check prerequisites.
4632 This checks that the instance is in the cluster and is not running.
4635 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4636 assert instance is not None, \
4637 "Cannot retrieve locked instance %s" % self.op.instance_name
4638 _CheckNodeOnline(self, instance.primary_node)
4640 if instance.disk_template == constants.DT_DISKLESS:
4641 raise errors.OpPrereqError("Instance '%s' has no disks" %
4642 self.op.instance_name,
4644 _CheckInstanceDown(self, instance, "cannot reinstall")
4646 if self.op.os_type is not None:
4648 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4649 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4651 self.instance = instance
4653 def Exec(self, feedback_fn):
4654 """Reinstall the instance.
4657 inst = self.instance
4659 if self.op.os_type is not None:
4660 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4661 inst.os = self.op.os_type
4662 self.cfg.Update(inst, feedback_fn)
4664 _StartInstanceDisks(self, inst, None)
4666 feedback_fn("Running the instance OS create scripts...")
4667 # FIXME: pass debug option from opcode to backend
4668 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4669 self.op.debug_level)
4670 result.Raise("Could not install OS for instance %s on node %s" %
4671 (inst.name, inst.primary_node))
4673 _ShutdownInstanceDisks(self, inst)
4676 class LURecreateInstanceDisks(LogicalUnit):
4677 """Recreate an instance's missing disks.
4680 HPATH = "instance-recreate-disks"
4681 HTYPE = constants.HTYPE_INSTANCE
4683 ("instance_name", _TNEString),
4684 ("disks", _TListOf(_TPInt)),
4688 def ExpandNames(self):
4689 self._ExpandAndLockInstance()
4691 def BuildHooksEnv(self):
4694 This runs on master, primary and secondary nodes of the instance.
4697 env = _BuildInstanceHookEnvByObject(self, self.instance)
4698 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4701 def CheckPrereq(self):
4702 """Check prerequisites.
4704 This checks that the instance is in the cluster and is not running.
4707 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4708 assert instance is not None, \
4709 "Cannot retrieve locked instance %s" % self.op.instance_name
4710 _CheckNodeOnline(self, instance.primary_node)
4712 if instance.disk_template == constants.DT_DISKLESS:
4713 raise errors.OpPrereqError("Instance '%s' has no disks" %
4714 self.op.instance_name, errors.ECODE_INVAL)
4715 _CheckInstanceDown(self, instance, "cannot recreate disks")
4717 if not self.op.disks:
4718 self.op.disks = range(len(instance.disks))
4720 for idx in self.op.disks:
4721 if idx >= len(instance.disks):
4722 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4725 self.instance = instance
4727 def Exec(self, feedback_fn):
4728 """Recreate the disks.
4732 for idx, _ in enumerate(self.instance.disks):
4733 if idx not in self.op.disks: # disk idx has not been passed in
4737 _CreateDisks(self, self.instance, to_skip=to_skip)
4740 class LURenameInstance(LogicalUnit):
4741 """Rename an instance.
4744 HPATH = "instance-rename"
4745 HTYPE = constants.HTYPE_INSTANCE
4747 ("instance_name", _TNEString),
4748 ("new_name", _TNEString),
4750 _OP_DEFS = [("ignore_ip", False)]
4752 def BuildHooksEnv(self):
4755 This runs on master, primary and secondary nodes of the instance.
4758 env = _BuildInstanceHookEnvByObject(self, self.instance)
4759 env["INSTANCE_NEW_NAME"] = self.op.new_name
4760 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4763 def CheckPrereq(self):
4764 """Check prerequisites.
4766 This checks that the instance is in the cluster and is not running.
4769 self.op.instance_name = _ExpandInstanceName(self.cfg,
4770 self.op.instance_name)
4771 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4772 assert instance is not None
4773 _CheckNodeOnline(self, instance.primary_node)
4774 _CheckInstanceDown(self, instance, "cannot rename")
4775 self.instance = instance
4777 # new name verification
4778 name_info = utils.GetHostInfo(self.op.new_name)
4780 self.op.new_name = new_name = name_info.name
4781 instance_list = self.cfg.GetInstanceList()
4782 if new_name in instance_list:
4783 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4784 new_name, errors.ECODE_EXISTS)
4786 if not self.op.ignore_ip:
4787 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4788 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4789 (name_info.ip, new_name),
4790 errors.ECODE_NOTUNIQUE)
4792 def Exec(self, feedback_fn):
4793 """Reinstall the instance.
4796 inst = self.instance
4797 old_name = inst.name
4799 if inst.disk_template == constants.DT_FILE:
4800 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4802 self.cfg.RenameInstance(inst.name, self.op.new_name)
4803 # Change the instance lock. This is definitely safe while we hold the BGL
4804 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4805 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4807 # re-read the instance from the configuration after rename
4808 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4810 if inst.disk_template == constants.DT_FILE:
4811 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4812 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4813 old_file_storage_dir,
4814 new_file_storage_dir)
4815 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4816 " (but the instance has been renamed in Ganeti)" %
4817 (inst.primary_node, old_file_storage_dir,
4818 new_file_storage_dir))
4820 _StartInstanceDisks(self, inst, None)
4822 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4823 old_name, self.op.debug_level)
4824 msg = result.fail_msg
4826 msg = ("Could not run OS rename script for instance %s on node %s"
4827 " (but the instance has been renamed in Ganeti): %s" %
4828 (inst.name, inst.primary_node, msg))
4829 self.proc.LogWarning(msg)
4831 _ShutdownInstanceDisks(self, inst)
4834 class LURemoveInstance(LogicalUnit):
4835 """Remove an instance.
4838 HPATH = "instance-remove"
4839 HTYPE = constants.HTYPE_INSTANCE
4841 ("instance_name", _TNEString),
4842 ("ignore_failures", _TBool),
4844 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4847 def ExpandNames(self):
4848 self._ExpandAndLockInstance()
4849 self.needed_locks[locking.LEVEL_NODE] = []
4850 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4852 def DeclareLocks(self, level):
4853 if level == locking.LEVEL_NODE:
4854 self._LockInstancesNodes()
4856 def BuildHooksEnv(self):
4859 This runs on master, primary and secondary nodes of the instance.
4862 env = _BuildInstanceHookEnvByObject(self, self.instance)
4863 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4864 nl = [self.cfg.GetMasterNode()]
4865 nl_post = list(self.instance.all_nodes) + nl
4866 return env, nl, nl_post
4868 def CheckPrereq(self):
4869 """Check prerequisites.
4871 This checks that the instance is in the cluster.
4874 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4875 assert self.instance is not None, \
4876 "Cannot retrieve locked instance %s" % self.op.instance_name
4878 def Exec(self, feedback_fn):
4879 """Remove the instance.
4882 instance = self.instance
4883 logging.info("Shutting down instance %s on node %s",
4884 instance.name, instance.primary_node)
4886 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4887 self.op.shutdown_timeout)
4888 msg = result.fail_msg
4890 if self.op.ignore_failures:
4891 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4893 raise errors.OpExecError("Could not shutdown instance %s on"
4895 (instance.name, instance.primary_node, msg))
4897 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4900 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4901 """Utility function to remove an instance.
4904 logging.info("Removing block devices for instance %s", instance.name)
4906 if not _RemoveDisks(lu, instance):
4907 if not ignore_failures:
4908 raise errors.OpExecError("Can't remove instance's disks")
4909 feedback_fn("Warning: can't remove instance's disks")
4911 logging.info("Removing instance %s out of cluster config", instance.name)
4913 lu.cfg.RemoveInstance(instance.name)
4915 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4916 "Instance lock removal conflict"
4918 # Remove lock for the instance
4919 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4922 class LUQueryInstances(NoHooksLU):
4923 """Logical unit for querying instances.
4926 # pylint: disable-msg=W0142
4928 ("output_fields", _TListOf(_TNEString)),
4929 ("names", _TListOf(_TNEString)),
4930 ("use_locking", _TBool),
4933 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4934 "serial_no", "ctime", "mtime", "uuid"]
4935 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4937 "disk_template", "ip", "mac", "bridge",
4938 "nic_mode", "nic_link",
4939 "sda_size", "sdb_size", "vcpus", "tags",
4940 "network_port", "beparams",
4941 r"(disk)\.(size)/([0-9]+)",
4942 r"(disk)\.(sizes)", "disk_usage",
4943 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4944 r"(nic)\.(bridge)/([0-9]+)",
4945 r"(nic)\.(macs|ips|modes|links|bridges)",
4946 r"(disk|nic)\.(count)",
4948 ] + _SIMPLE_FIELDS +
4950 for name in constants.HVS_PARAMETERS
4951 if name not in constants.HVC_GLOBALS] +
4953 for name in constants.BES_PARAMETERS])
4954 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4957 def CheckArguments(self):
4958 _CheckOutputFields(static=self._FIELDS_STATIC,
4959 dynamic=self._FIELDS_DYNAMIC,
4960 selected=self.op.output_fields)
4962 def ExpandNames(self):
4963 self.needed_locks = {}
4964 self.share_locks[locking.LEVEL_INSTANCE] = 1
4965 self.share_locks[locking.LEVEL_NODE] = 1
4968 self.wanted = _GetWantedInstances(self, self.op.names)
4970 self.wanted = locking.ALL_SET
4972 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4973 self.do_locking = self.do_node_query and self.op.use_locking
4975 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4976 self.needed_locks[locking.LEVEL_NODE] = []
4977 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4979 def DeclareLocks(self, level):
4980 if level == locking.LEVEL_NODE and self.do_locking:
4981 self._LockInstancesNodes()
4983 def Exec(self, feedback_fn):
4984 """Computes the list of nodes and their attributes.
4987 # pylint: disable-msg=R0912
4988 # way too many branches here
4989 all_info = self.cfg.GetAllInstancesInfo()
4990 if self.wanted == locking.ALL_SET:
4991 # caller didn't specify instance names, so ordering is not important
4993 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4995 instance_names = all_info.keys()
4996 instance_names = utils.NiceSort(instance_names)
4998 # caller did specify names, so we must keep the ordering
5000 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5002 tgt_set = all_info.keys()
5003 missing = set(self.wanted).difference(tgt_set)
5005 raise errors.OpExecError("Some instances were removed before"
5006 " retrieving their data: %s" % missing)
5007 instance_names = self.wanted
5009 instance_list = [all_info[iname] for iname in instance_names]
5011 # begin data gathering
5013 nodes = frozenset([inst.primary_node for inst in instance_list])
5014 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5018 if self.do_node_query:
5020 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5022 result = node_data[name]
5024 # offline nodes will be in both lists
5025 off_nodes.append(name)
5027 bad_nodes.append(name)
5030 live_data.update(result.payload)
5031 # else no instance is alive
5033 live_data = dict([(name, {}) for name in instance_names])
5035 # end data gathering
5040 cluster = self.cfg.GetClusterInfo()
5041 for instance in instance_list:
5043 i_hv = cluster.FillHV(instance, skip_globals=True)
5044 i_be = cluster.FillBE(instance)
5045 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5046 for field in self.op.output_fields:
5047 st_match = self._FIELDS_STATIC.Matches(field)
5048 if field in self._SIMPLE_FIELDS:
5049 val = getattr(instance, field)
5050 elif field == "pnode":
5051 val = instance.primary_node
5052 elif field == "snodes":
5053 val = list(instance.secondary_nodes)
5054 elif field == "admin_state":
5055 val = instance.admin_up
5056 elif field == "oper_state":
5057 if instance.primary_node in bad_nodes:
5060 val = bool(live_data.get(instance.name))
5061 elif field == "status":
5062 if instance.primary_node in off_nodes:
5063 val = "ERROR_nodeoffline"
5064 elif instance.primary_node in bad_nodes:
5065 val = "ERROR_nodedown"
5067 running = bool(live_data.get(instance.name))
5069 if instance.admin_up:
5074 if instance.admin_up:
5078 elif field == "oper_ram":
5079 if instance.primary_node in bad_nodes:
5081 elif instance.name in live_data:
5082 val = live_data[instance.name].get("memory", "?")
5085 elif field == "vcpus":
5086 val = i_be[constants.BE_VCPUS]
5087 elif field == "disk_template":
5088 val = instance.disk_template
5091 val = instance.nics[0].ip
5094 elif field == "nic_mode":
5096 val = i_nicp[0][constants.NIC_MODE]
5099 elif field == "nic_link":
5101 val = i_nicp[0][constants.NIC_LINK]
5104 elif field == "bridge":
5105 if (instance.nics and
5106 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5107 val = i_nicp[0][constants.NIC_LINK]
5110 elif field == "mac":
5112 val = instance.nics[0].mac
5115 elif field == "sda_size" or field == "sdb_size":
5116 idx = ord(field[2]) - ord('a')
5118 val = instance.FindDisk(idx).size
5119 except errors.OpPrereqError:
5121 elif field == "disk_usage": # total disk usage per node
5122 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5123 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5124 elif field == "tags":
5125 val = list(instance.GetTags())
5126 elif field == "hvparams":
5128 elif (field.startswith(HVPREFIX) and
5129 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5130 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5131 val = i_hv.get(field[len(HVPREFIX):], None)
5132 elif field == "beparams":
5134 elif (field.startswith(BEPREFIX) and
5135 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5136 val = i_be.get(field[len(BEPREFIX):], None)
5137 elif st_match and st_match.groups():
5138 # matches a variable list
5139 st_groups = st_match.groups()
5140 if st_groups and st_groups[0] == "disk":
5141 if st_groups[1] == "count":
5142 val = len(instance.disks)
5143 elif st_groups[1] == "sizes":
5144 val = [disk.size for disk in instance.disks]
5145 elif st_groups[1] == "size":
5147 val = instance.FindDisk(st_groups[2]).size
5148 except errors.OpPrereqError:
5151 assert False, "Unhandled disk parameter"
5152 elif st_groups[0] == "nic":
5153 if st_groups[1] == "count":
5154 val = len(instance.nics)
5155 elif st_groups[1] == "macs":
5156 val = [nic.mac for nic in instance.nics]
5157 elif st_groups[1] == "ips":
5158 val = [nic.ip for nic in instance.nics]
5159 elif st_groups[1] == "modes":
5160 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5161 elif st_groups[1] == "links":
5162 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5163 elif st_groups[1] == "bridges":
5166 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5167 val.append(nicp[constants.NIC_LINK])
5172 nic_idx = int(st_groups[2])
5173 if nic_idx >= len(instance.nics):
5176 if st_groups[1] == "mac":
5177 val = instance.nics[nic_idx].mac
5178 elif st_groups[1] == "ip":
5179 val = instance.nics[nic_idx].ip
5180 elif st_groups[1] == "mode":
5181 val = i_nicp[nic_idx][constants.NIC_MODE]
5182 elif st_groups[1] == "link":
5183 val = i_nicp[nic_idx][constants.NIC_LINK]
5184 elif st_groups[1] == "bridge":
5185 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5186 if nic_mode == constants.NIC_MODE_BRIDGED:
5187 val = i_nicp[nic_idx][constants.NIC_LINK]
5191 assert False, "Unhandled NIC parameter"
5193 assert False, ("Declared but unhandled variable parameter '%s'" %
5196 assert False, "Declared but unhandled parameter '%s'" % field
5203 class LUFailoverInstance(LogicalUnit):
5204 """Failover an instance.
5207 HPATH = "instance-failover"
5208 HTYPE = constants.HTYPE_INSTANCE
5210 ("instance_name", _TNEString),
5211 ("ignore_consistency", _TBool),
5213 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5216 def ExpandNames(self):
5217 self._ExpandAndLockInstance()
5218 self.needed_locks[locking.LEVEL_NODE] = []
5219 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5221 def DeclareLocks(self, level):
5222 if level == locking.LEVEL_NODE:
5223 self._LockInstancesNodes()
5225 def BuildHooksEnv(self):
5228 This runs on master, primary and secondary nodes of the instance.
5231 instance = self.instance
5232 source_node = instance.primary_node
5233 target_node = instance.secondary_nodes[0]
5235 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5236 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5237 "OLD_PRIMARY": source_node,
5238 "OLD_SECONDARY": target_node,
5239 "NEW_PRIMARY": target_node,
5240 "NEW_SECONDARY": source_node,
5242 env.update(_BuildInstanceHookEnvByObject(self, instance))
5243 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5245 nl_post.append(source_node)
5246 return env, nl, nl_post
5248 def CheckPrereq(self):
5249 """Check prerequisites.
5251 This checks that the instance is in the cluster.
5254 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5255 assert self.instance is not None, \
5256 "Cannot retrieve locked instance %s" % self.op.instance_name
5258 bep = self.cfg.GetClusterInfo().FillBE(instance)
5259 if instance.disk_template not in constants.DTS_NET_MIRROR:
5260 raise errors.OpPrereqError("Instance's disk layout is not"
5261 " network mirrored, cannot failover.",
5264 secondary_nodes = instance.secondary_nodes
5265 if not secondary_nodes:
5266 raise errors.ProgrammerError("no secondary node but using "
5267 "a mirrored disk template")
5269 target_node = secondary_nodes[0]
5270 _CheckNodeOnline(self, target_node)
5271 _CheckNodeNotDrained(self, target_node)
5272 if instance.admin_up:
5273 # check memory requirements on the secondary node
5274 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5275 instance.name, bep[constants.BE_MEMORY],
5276 instance.hypervisor)
5278 self.LogInfo("Not checking memory on the secondary node as"
5279 " instance will not be started")
5281 # check bridge existance
5282 _CheckInstanceBridgesExist(self, instance, node=target_node)
5284 def Exec(self, feedback_fn):
5285 """Failover an instance.
5287 The failover is done by shutting it down on its present node and
5288 starting it on the secondary.
5291 instance = self.instance
5293 source_node = instance.primary_node
5294 target_node = instance.secondary_nodes[0]
5296 if instance.admin_up:
5297 feedback_fn("* checking disk consistency between source and target")
5298 for dev in instance.disks:
5299 # for drbd, these are drbd over lvm
5300 if not _CheckDiskConsistency(self, dev, target_node, False):
5301 if not self.op.ignore_consistency:
5302 raise errors.OpExecError("Disk %s is degraded on target node,"
5303 " aborting failover." % dev.iv_name)
5305 feedback_fn("* not checking disk consistency as instance is not running")
5307 feedback_fn("* shutting down instance on source node")
5308 logging.info("Shutting down instance %s on node %s",
5309 instance.name, source_node)
5311 result = self.rpc.call_instance_shutdown(source_node, instance,
5312 self.op.shutdown_timeout)
5313 msg = result.fail_msg
5315 if self.op.ignore_consistency:
5316 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5317 " Proceeding anyway. Please make sure node"
5318 " %s is down. Error details: %s",
5319 instance.name, source_node, source_node, msg)
5321 raise errors.OpExecError("Could not shutdown instance %s on"
5323 (instance.name, source_node, msg))
5325 feedback_fn("* deactivating the instance's disks on source node")
5326 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5327 raise errors.OpExecError("Can't shut down the instance's disks.")
5329 instance.primary_node = target_node
5330 # distribute new instance config to the other nodes
5331 self.cfg.Update(instance, feedback_fn)
5333 # Only start the instance if it's marked as up
5334 if instance.admin_up:
5335 feedback_fn("* activating the instance's disks on target node")
5336 logging.info("Starting instance %s on node %s",
5337 instance.name, target_node)
5339 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5340 ignore_secondaries=True)
5342 _ShutdownInstanceDisks(self, instance)
5343 raise errors.OpExecError("Can't activate the instance's disks")
5345 feedback_fn("* starting the instance on the target node")
5346 result = self.rpc.call_instance_start(target_node, instance, None, None)
5347 msg = result.fail_msg
5349 _ShutdownInstanceDisks(self, instance)
5350 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5351 (instance.name, target_node, msg))
5354 class LUMigrateInstance(LogicalUnit):
5355 """Migrate an instance.
5357 This is migration without shutting down, compared to the failover,
5358 which is done with shutdown.
5361 HPATH = "instance-migrate"
5362 HTYPE = constants.HTYPE_INSTANCE
5364 ("instance_name", _TNEString),
5366 ("cleanup", _TBool),
5371 def ExpandNames(self):
5372 self._ExpandAndLockInstance()
5374 self.needed_locks[locking.LEVEL_NODE] = []
5375 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5377 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5378 self.op.live, self.op.cleanup)
5379 self.tasklets = [self._migrater]
5381 def DeclareLocks(self, level):
5382 if level == locking.LEVEL_NODE:
5383 self._LockInstancesNodes()
5385 def BuildHooksEnv(self):
5388 This runs on master, primary and secondary nodes of the instance.
5391 instance = self._migrater.instance
5392 source_node = instance.primary_node
5393 target_node = instance.secondary_nodes[0]
5394 env = _BuildInstanceHookEnvByObject(self, instance)
5395 env["MIGRATE_LIVE"] = self.op.live
5396 env["MIGRATE_CLEANUP"] = self.op.cleanup
5398 "OLD_PRIMARY": source_node,
5399 "OLD_SECONDARY": target_node,
5400 "NEW_PRIMARY": target_node,
5401 "NEW_SECONDARY": source_node,
5403 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5405 nl_post.append(source_node)
5406 return env, nl, nl_post
5409 class LUMoveInstance(LogicalUnit):
5410 """Move an instance by data-copying.
5413 HPATH = "instance-move"
5414 HTYPE = constants.HTYPE_INSTANCE
5416 ("instance_name", _TNEString),
5417 ("target_node", _TNEString),
5419 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5422 def ExpandNames(self):
5423 self._ExpandAndLockInstance()
5424 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5425 self.op.target_node = target_node
5426 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5427 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5429 def DeclareLocks(self, level):
5430 if level == locking.LEVEL_NODE:
5431 self._LockInstancesNodes(primary_only=True)
5433 def BuildHooksEnv(self):
5436 This runs on master, primary and secondary nodes of the instance.
5440 "TARGET_NODE": self.op.target_node,
5441 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5443 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5444 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5445 self.op.target_node]
5448 def CheckPrereq(self):
5449 """Check prerequisites.
5451 This checks that the instance is in the cluster.
5454 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5455 assert self.instance is not None, \
5456 "Cannot retrieve locked instance %s" % self.op.instance_name
5458 node = self.cfg.GetNodeInfo(self.op.target_node)
5459 assert node is not None, \
5460 "Cannot retrieve locked node %s" % self.op.target_node
5462 self.target_node = target_node = node.name
5464 if target_node == instance.primary_node:
5465 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5466 (instance.name, target_node),
5469 bep = self.cfg.GetClusterInfo().FillBE(instance)
5471 for idx, dsk in enumerate(instance.disks):
5472 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5473 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5474 " cannot copy" % idx, errors.ECODE_STATE)
5476 _CheckNodeOnline(self, target_node)
5477 _CheckNodeNotDrained(self, target_node)
5479 if instance.admin_up:
5480 # check memory requirements on the secondary node
5481 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5482 instance.name, bep[constants.BE_MEMORY],
5483 instance.hypervisor)
5485 self.LogInfo("Not checking memory on the secondary node as"
5486 " instance will not be started")
5488 # check bridge existance
5489 _CheckInstanceBridgesExist(self, instance, node=target_node)
5491 def Exec(self, feedback_fn):
5492 """Move an instance.
5494 The move is done by shutting it down on its present node, copying
5495 the data over (slow) and starting it on the new node.
5498 instance = self.instance
5500 source_node = instance.primary_node
5501 target_node = self.target_node
5503 self.LogInfo("Shutting down instance %s on source node %s",
5504 instance.name, source_node)
5506 result = self.rpc.call_instance_shutdown(source_node, instance,
5507 self.op.shutdown_timeout)
5508 msg = result.fail_msg
5510 if self.op.ignore_consistency:
5511 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5512 " Proceeding anyway. Please make sure node"
5513 " %s is down. Error details: %s",
5514 instance.name, source_node, source_node, msg)
5516 raise errors.OpExecError("Could not shutdown instance %s on"
5518 (instance.name, source_node, msg))
5520 # create the target disks
5522 _CreateDisks(self, instance, target_node=target_node)
5523 except errors.OpExecError:
5524 self.LogWarning("Device creation failed, reverting...")
5526 _RemoveDisks(self, instance, target_node=target_node)
5528 self.cfg.ReleaseDRBDMinors(instance.name)
5531 cluster_name = self.cfg.GetClusterInfo().cluster_name
5534 # activate, get path, copy the data over
5535 for idx, disk in enumerate(instance.disks):
5536 self.LogInfo("Copying data for disk %d", idx)
5537 result = self.rpc.call_blockdev_assemble(target_node, disk,
5538 instance.name, True)
5540 self.LogWarning("Can't assemble newly created disk %d: %s",
5541 idx, result.fail_msg)
5542 errs.append(result.fail_msg)
5544 dev_path = result.payload
5545 result = self.rpc.call_blockdev_export(source_node, disk,
5546 target_node, dev_path,
5549 self.LogWarning("Can't copy data over for disk %d: %s",
5550 idx, result.fail_msg)
5551 errs.append(result.fail_msg)
5555 self.LogWarning("Some disks failed to copy, aborting")
5557 _RemoveDisks(self, instance, target_node=target_node)
5559 self.cfg.ReleaseDRBDMinors(instance.name)
5560 raise errors.OpExecError("Errors during disk copy: %s" %
5563 instance.primary_node = target_node
5564 self.cfg.Update(instance, feedback_fn)
5566 self.LogInfo("Removing the disks on the original node")
5567 _RemoveDisks(self, instance, target_node=source_node)
5569 # Only start the instance if it's marked as up
5570 if instance.admin_up:
5571 self.LogInfo("Starting instance %s on node %s",
5572 instance.name, target_node)
5574 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5575 ignore_secondaries=True)
5577 _ShutdownInstanceDisks(self, instance)
5578 raise errors.OpExecError("Can't activate the instance's disks")
5580 result = self.rpc.call_instance_start(target_node, instance, None, None)
5581 msg = result.fail_msg
5583 _ShutdownInstanceDisks(self, instance)
5584 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5585 (instance.name, target_node, msg))
5588 class LUMigrateNode(LogicalUnit):
5589 """Migrate all instances from a node.
5592 HPATH = "node-migrate"
5593 HTYPE = constants.HTYPE_NODE
5595 ("node_name", _TNEString),
5600 def ExpandNames(self):
5601 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5603 self.needed_locks = {
5604 locking.LEVEL_NODE: [self.op.node_name],
5607 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5609 # Create tasklets for migrating instances for all instances on this node
5613 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5614 logging.debug("Migrating instance %s", inst.name)
5615 names.append(inst.name)
5617 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5619 self.tasklets = tasklets
5621 # Declare instance locks
5622 self.needed_locks[locking.LEVEL_INSTANCE] = names
5624 def DeclareLocks(self, level):
5625 if level == locking.LEVEL_NODE:
5626 self._LockInstancesNodes()
5628 def BuildHooksEnv(self):
5631 This runs on the master, the primary and all the secondaries.
5635 "NODE_NAME": self.op.node_name,
5638 nl = [self.cfg.GetMasterNode()]
5640 return (env, nl, nl)
5643 class TLMigrateInstance(Tasklet):
5644 def __init__(self, lu, instance_name, live, cleanup):
5645 """Initializes this class.
5648 Tasklet.__init__(self, lu)
5651 self.instance_name = instance_name
5653 self.cleanup = cleanup
5655 def CheckPrereq(self):
5656 """Check prerequisites.
5658 This checks that the instance is in the cluster.
5661 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5662 instance = self.cfg.GetInstanceInfo(instance_name)
5663 assert instance is not None
5665 if instance.disk_template != constants.DT_DRBD8:
5666 raise errors.OpPrereqError("Instance's disk layout is not"
5667 " drbd8, cannot migrate.", errors.ECODE_STATE)
5669 secondary_nodes = instance.secondary_nodes
5670 if not secondary_nodes:
5671 raise errors.ConfigurationError("No secondary node but using"
5672 " drbd8 disk template")
5674 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5676 target_node = secondary_nodes[0]
5677 # check memory requirements on the secondary node
5678 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5679 instance.name, i_be[constants.BE_MEMORY],
5680 instance.hypervisor)
5682 # check bridge existance
5683 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5685 if not self.cleanup:
5686 _CheckNodeNotDrained(self.lu, target_node)
5687 result = self.rpc.call_instance_migratable(instance.primary_node,
5689 result.Raise("Can't migrate, please use failover",
5690 prereq=True, ecode=errors.ECODE_STATE)
5692 self.instance = instance
5694 def _WaitUntilSync(self):
5695 """Poll with custom rpc for disk sync.
5697 This uses our own step-based rpc call.
5700 self.feedback_fn("* wait until resync is done")
5704 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5706 self.instance.disks)
5708 for node, nres in result.items():
5709 nres.Raise("Cannot resync disks on node %s" % node)
5710 node_done, node_percent = nres.payload
5711 all_done = all_done and node_done
5712 if node_percent is not None:
5713 min_percent = min(min_percent, node_percent)
5715 if min_percent < 100:
5716 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5719 def _EnsureSecondary(self, node):
5720 """Demote a node to secondary.
5723 self.feedback_fn("* switching node %s to secondary mode" % node)
5725 for dev in self.instance.disks:
5726 self.cfg.SetDiskID(dev, node)
5728 result = self.rpc.call_blockdev_close(node, self.instance.name,
5729 self.instance.disks)
5730 result.Raise("Cannot change disk to secondary on node %s" % node)
5732 def _GoStandalone(self):
5733 """Disconnect from the network.
5736 self.feedback_fn("* changing into standalone mode")
5737 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5738 self.instance.disks)
5739 for node, nres in result.items():
5740 nres.Raise("Cannot disconnect disks node %s" % node)
5742 def _GoReconnect(self, multimaster):
5743 """Reconnect to the network.
5749 msg = "single-master"
5750 self.feedback_fn("* changing disks into %s mode" % msg)
5751 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5752 self.instance.disks,
5753 self.instance.name, multimaster)
5754 for node, nres in result.items():
5755 nres.Raise("Cannot change disks config on node %s" % node)
5757 def _ExecCleanup(self):
5758 """Try to cleanup after a failed migration.
5760 The cleanup is done by:
5761 - check that the instance is running only on one node
5762 (and update the config if needed)
5763 - change disks on its secondary node to secondary
5764 - wait until disks are fully synchronized
5765 - disconnect from the network
5766 - change disks into single-master mode
5767 - wait again until disks are fully synchronized
5770 instance = self.instance
5771 target_node = self.target_node
5772 source_node = self.source_node
5774 # check running on only one node
5775 self.feedback_fn("* checking where the instance actually runs"
5776 " (if this hangs, the hypervisor might be in"
5778 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5779 for node, result in ins_l.items():
5780 result.Raise("Can't contact node %s" % node)
5782 runningon_source = instance.name in ins_l[source_node].payload
5783 runningon_target = instance.name in ins_l[target_node].payload
5785 if runningon_source and runningon_target:
5786 raise errors.OpExecError("Instance seems to be running on two nodes,"
5787 " or the hypervisor is confused. You will have"
5788 " to ensure manually that it runs only on one"
5789 " and restart this operation.")
5791 if not (runningon_source or runningon_target):
5792 raise errors.OpExecError("Instance does not seem to be running at all."
5793 " In this case, it's safer to repair by"
5794 " running 'gnt-instance stop' to ensure disk"
5795 " shutdown, and then restarting it.")
5797 if runningon_target:
5798 # the migration has actually succeeded, we need to update the config
5799 self.feedback_fn("* instance running on secondary node (%s),"
5800 " updating config" % target_node)
5801 instance.primary_node = target_node
5802 self.cfg.Update(instance, self.feedback_fn)
5803 demoted_node = source_node
5805 self.feedback_fn("* instance confirmed to be running on its"
5806 " primary node (%s)" % source_node)
5807 demoted_node = target_node
5809 self._EnsureSecondary(demoted_node)
5811 self._WaitUntilSync()
5812 except errors.OpExecError:
5813 # we ignore here errors, since if the device is standalone, it
5814 # won't be able to sync
5816 self._GoStandalone()
5817 self._GoReconnect(False)
5818 self._WaitUntilSync()
5820 self.feedback_fn("* done")
5822 def _RevertDiskStatus(self):
5823 """Try to revert the disk status after a failed migration.
5826 target_node = self.target_node
5828 self._EnsureSecondary(target_node)
5829 self._GoStandalone()
5830 self._GoReconnect(False)
5831 self._WaitUntilSync()
5832 except errors.OpExecError, err:
5833 self.lu.LogWarning("Migration failed and I can't reconnect the"
5834 " drives: error '%s'\n"
5835 "Please look and recover the instance status" %
5838 def _AbortMigration(self):
5839 """Call the hypervisor code to abort a started migration.
5842 instance = self.instance
5843 target_node = self.target_node
5844 migration_info = self.migration_info
5846 abort_result = self.rpc.call_finalize_migration(target_node,
5850 abort_msg = abort_result.fail_msg
5852 logging.error("Aborting migration failed on target node %s: %s",
5853 target_node, abort_msg)
5854 # Don't raise an exception here, as we stil have to try to revert the
5855 # disk status, even if this step failed.
5857 def _ExecMigration(self):
5858 """Migrate an instance.
5860 The migrate is done by:
5861 - change the disks into dual-master mode
5862 - wait until disks are fully synchronized again
5863 - migrate the instance
5864 - change disks on the new secondary node (the old primary) to secondary
5865 - wait until disks are fully synchronized
5866 - change disks into single-master mode
5869 instance = self.instance
5870 target_node = self.target_node
5871 source_node = self.source_node
5873 self.feedback_fn("* checking disk consistency between source and target")
5874 for dev in instance.disks:
5875 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5876 raise errors.OpExecError("Disk %s is degraded or not fully"
5877 " synchronized on target node,"
5878 " aborting migrate." % dev.iv_name)
5880 # First get the migration information from the remote node
5881 result = self.rpc.call_migration_info(source_node, instance)
5882 msg = result.fail_msg
5884 log_err = ("Failed fetching source migration information from %s: %s" %
5886 logging.error(log_err)
5887 raise errors.OpExecError(log_err)
5889 self.migration_info = migration_info = result.payload
5891 # Then switch the disks to master/master mode
5892 self._EnsureSecondary(target_node)
5893 self._GoStandalone()
5894 self._GoReconnect(True)
5895 self._WaitUntilSync()
5897 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5898 result = self.rpc.call_accept_instance(target_node,
5901 self.nodes_ip[target_node])
5903 msg = result.fail_msg
5905 logging.error("Instance pre-migration failed, trying to revert"
5906 " disk status: %s", msg)
5907 self.feedback_fn("Pre-migration failed, aborting")
5908 self._AbortMigration()
5909 self._RevertDiskStatus()
5910 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5911 (instance.name, msg))
5913 self.feedback_fn("* migrating instance to %s" % target_node)
5915 result = self.rpc.call_instance_migrate(source_node, instance,
5916 self.nodes_ip[target_node],
5918 msg = result.fail_msg
5920 logging.error("Instance migration failed, trying to revert"
5921 " disk status: %s", msg)
5922 self.feedback_fn("Migration failed, aborting")
5923 self._AbortMigration()
5924 self._RevertDiskStatus()
5925 raise errors.OpExecError("Could not migrate instance %s: %s" %
5926 (instance.name, msg))
5929 instance.primary_node = target_node
5930 # distribute new instance config to the other nodes
5931 self.cfg.Update(instance, self.feedback_fn)
5933 result = self.rpc.call_finalize_migration(target_node,
5937 msg = result.fail_msg
5939 logging.error("Instance migration succeeded, but finalization failed:"
5941 raise errors.OpExecError("Could not finalize instance migration: %s" %
5944 self._EnsureSecondary(source_node)
5945 self._WaitUntilSync()
5946 self._GoStandalone()
5947 self._GoReconnect(False)
5948 self._WaitUntilSync()
5950 self.feedback_fn("* done")
5952 def Exec(self, feedback_fn):
5953 """Perform the migration.
5956 feedback_fn("Migrating instance %s" % self.instance.name)
5958 self.feedback_fn = feedback_fn
5960 self.source_node = self.instance.primary_node
5961 self.target_node = self.instance.secondary_nodes[0]
5962 self.all_nodes = [self.source_node, self.target_node]
5964 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5965 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5969 return self._ExecCleanup()
5971 return self._ExecMigration()
5974 def _CreateBlockDev(lu, node, instance, device, force_create,
5976 """Create a tree of block devices on a given node.
5978 If this device type has to be created on secondaries, create it and
5981 If not, just recurse to children keeping the same 'force' value.
5983 @param lu: the lu on whose behalf we execute
5984 @param node: the node on which to create the device
5985 @type instance: L{objects.Instance}
5986 @param instance: the instance which owns the device
5987 @type device: L{objects.Disk}
5988 @param device: the device to create
5989 @type force_create: boolean
5990 @param force_create: whether to force creation of this device; this
5991 will be change to True whenever we find a device which has
5992 CreateOnSecondary() attribute
5993 @param info: the extra 'metadata' we should attach to the device
5994 (this will be represented as a LVM tag)
5995 @type force_open: boolean
5996 @param force_open: this parameter will be passes to the
5997 L{backend.BlockdevCreate} function where it specifies
5998 whether we run on primary or not, and it affects both
5999 the child assembly and the device own Open() execution
6002 if device.CreateOnSecondary():
6006 for child in device.children:
6007 _CreateBlockDev(lu, node, instance, child, force_create,
6010 if not force_create:
6013 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6016 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6017 """Create a single block device on a given node.
6019 This will not recurse over children of the device, so they must be
6022 @param lu: the lu on whose behalf we execute
6023 @param node: the node on which to create the device
6024 @type instance: L{objects.Instance}
6025 @param instance: the instance which owns the device
6026 @type device: L{objects.Disk}
6027 @param device: the device to create
6028 @param info: the extra 'metadata' we should attach to the device
6029 (this will be represented as a LVM tag)
6030 @type force_open: boolean
6031 @param force_open: this parameter will be passes to the
6032 L{backend.BlockdevCreate} function where it specifies
6033 whether we run on primary or not, and it affects both
6034 the child assembly and the device own Open() execution
6037 lu.cfg.SetDiskID(device, node)
6038 result = lu.rpc.call_blockdev_create(node, device, device.size,
6039 instance.name, force_open, info)
6040 result.Raise("Can't create block device %s on"
6041 " node %s for instance %s" % (device, node, instance.name))
6042 if device.physical_id is None:
6043 device.physical_id = result.payload
6046 def _GenerateUniqueNames(lu, exts):
6047 """Generate a suitable LV name.
6049 This will generate a logical volume name for the given instance.
6054 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6055 results.append("%s%s" % (new_id, val))
6059 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6061 """Generate a drbd8 device complete with its children.
6064 port = lu.cfg.AllocatePort()
6065 vgname = lu.cfg.GetVGName()
6066 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6067 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6068 logical_id=(vgname, names[0]))
6069 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6070 logical_id=(vgname, names[1]))
6071 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6072 logical_id=(primary, secondary, port,
6075 children=[dev_data, dev_meta],
6080 def _GenerateDiskTemplate(lu, template_name,
6081 instance_name, primary_node,
6082 secondary_nodes, disk_info,
6083 file_storage_dir, file_driver,
6085 """Generate the entire disk layout for a given template type.
6088 #TODO: compute space requirements
6090 vgname = lu.cfg.GetVGName()
6091 disk_count = len(disk_info)
6093 if template_name == constants.DT_DISKLESS:
6095 elif template_name == constants.DT_PLAIN:
6096 if len(secondary_nodes) != 0:
6097 raise errors.ProgrammerError("Wrong template configuration")
6099 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6100 for i in range(disk_count)])
6101 for idx, disk in enumerate(disk_info):
6102 disk_index = idx + base_index
6103 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6104 logical_id=(vgname, names[idx]),
6105 iv_name="disk/%d" % disk_index,
6107 disks.append(disk_dev)
6108 elif template_name == constants.DT_DRBD8:
6109 if len(secondary_nodes) != 1:
6110 raise errors.ProgrammerError("Wrong template configuration")
6111 remote_node = secondary_nodes[0]
6112 minors = lu.cfg.AllocateDRBDMinor(
6113 [primary_node, remote_node] * len(disk_info), instance_name)
6116 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6117 for i in range(disk_count)]):
6118 names.append(lv_prefix + "_data")
6119 names.append(lv_prefix + "_meta")
6120 for idx, disk in enumerate(disk_info):
6121 disk_index = idx + base_index
6122 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6123 disk["size"], names[idx*2:idx*2+2],
6124 "disk/%d" % disk_index,
6125 minors[idx*2], minors[idx*2+1])
6126 disk_dev.mode = disk["mode"]
6127 disks.append(disk_dev)
6128 elif template_name == constants.DT_FILE:
6129 if len(secondary_nodes) != 0:
6130 raise errors.ProgrammerError("Wrong template configuration")
6132 _RequireFileStorage()
6134 for idx, disk in enumerate(disk_info):
6135 disk_index = idx + base_index
6136 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6137 iv_name="disk/%d" % disk_index,
6138 logical_id=(file_driver,
6139 "%s/disk%d" % (file_storage_dir,
6142 disks.append(disk_dev)
6144 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6148 def _GetInstanceInfoText(instance):
6149 """Compute that text that should be added to the disk's metadata.
6152 return "originstname+%s" % instance.name
6155 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6156 """Create all disks for an instance.
6158 This abstracts away some work from AddInstance.
6160 @type lu: L{LogicalUnit}
6161 @param lu: the logical unit on whose behalf we execute
6162 @type instance: L{objects.Instance}
6163 @param instance: the instance whose disks we should create
6165 @param to_skip: list of indices to skip
6166 @type target_node: string
6167 @param target_node: if passed, overrides the target node for creation
6169 @return: the success of the creation
6172 info = _GetInstanceInfoText(instance)
6173 if target_node is None:
6174 pnode = instance.primary_node
6175 all_nodes = instance.all_nodes
6180 if instance.disk_template == constants.DT_FILE:
6181 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6182 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6184 result.Raise("Failed to create directory '%s' on"
6185 " node %s" % (file_storage_dir, pnode))
6187 # Note: this needs to be kept in sync with adding of disks in
6188 # LUSetInstanceParams
6189 for idx, device in enumerate(instance.disks):
6190 if to_skip and idx in to_skip:
6192 logging.info("Creating volume %s for instance %s",
6193 device.iv_name, instance.name)
6195 for node in all_nodes:
6196 f_create = node == pnode
6197 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6200 def _RemoveDisks(lu, instance, target_node=None):
6201 """Remove all disks for an instance.
6203 This abstracts away some work from `AddInstance()` and
6204 `RemoveInstance()`. Note that in case some of the devices couldn't
6205 be removed, the removal will continue with the other ones (compare
6206 with `_CreateDisks()`).
6208 @type lu: L{LogicalUnit}
6209 @param lu: the logical unit on whose behalf we execute
6210 @type instance: L{objects.Instance}
6211 @param instance: the instance whose disks we should remove
6212 @type target_node: string
6213 @param target_node: used to override the node on which to remove the disks
6215 @return: the success of the removal
6218 logging.info("Removing block devices for instance %s", instance.name)
6221 for device in instance.disks:
6223 edata = [(target_node, device)]
6225 edata = device.ComputeNodeTree(instance.primary_node)
6226 for node, disk in edata:
6227 lu.cfg.SetDiskID(disk, node)
6228 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6230 lu.LogWarning("Could not remove block device %s on node %s,"
6231 " continuing anyway: %s", device.iv_name, node, msg)
6234 if instance.disk_template == constants.DT_FILE:
6235 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6239 tgt = instance.primary_node
6240 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6242 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6243 file_storage_dir, instance.primary_node, result.fail_msg)
6249 def _ComputeDiskSize(disk_template, disks):
6250 """Compute disk size requirements in the volume group
6253 # Required free disk space as a function of disk and swap space
6255 constants.DT_DISKLESS: None,
6256 constants.DT_PLAIN: sum(d["size"] for d in disks),
6257 # 128 MB are added for drbd metadata for each disk
6258 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6259 constants.DT_FILE: None,
6262 if disk_template not in req_size_dict:
6263 raise errors.ProgrammerError("Disk template '%s' size requirement"
6264 " is unknown" % disk_template)
6266 return req_size_dict[disk_template]
6269 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6270 """Hypervisor parameter validation.
6272 This function abstract the hypervisor parameter validation to be
6273 used in both instance create and instance modify.
6275 @type lu: L{LogicalUnit}
6276 @param lu: the logical unit for which we check
6277 @type nodenames: list
6278 @param nodenames: the list of nodes on which we should check
6279 @type hvname: string
6280 @param hvname: the name of the hypervisor we should use
6281 @type hvparams: dict
6282 @param hvparams: the parameters which we need to check
6283 @raise errors.OpPrereqError: if the parameters are not valid
6286 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6289 for node in nodenames:
6293 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6296 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6297 """OS parameters validation.
6299 @type lu: L{LogicalUnit}
6300 @param lu: the logical unit for which we check
6301 @type required: boolean
6302 @param required: whether the validation should fail if the OS is not
6304 @type nodenames: list
6305 @param nodenames: the list of nodes on which we should check
6306 @type osname: string
6307 @param osname: the name of the hypervisor we should use
6308 @type osparams: dict
6309 @param osparams: the parameters which we need to check
6310 @raise errors.OpPrereqError: if the parameters are not valid
6313 result = lu.rpc.call_os_validate(required, nodenames, osname,
6314 [constants.OS_VALIDATE_PARAMETERS],
6316 for node, nres in result.items():
6317 # we don't check for offline cases since this should be run only
6318 # against the master node and/or an instance's nodes
6319 nres.Raise("OS Parameters validation failed on node %s" % node)
6320 if not nres.payload:
6321 lu.LogInfo("OS %s not found on node %s, validation skipped",
6325 class LUCreateInstance(LogicalUnit):
6326 """Create an instance.
6329 HPATH = "instance-add"
6330 HTYPE = constants.HTYPE_INSTANCE
6332 ("instance_name", _TNEString),
6333 ("mode", _TElemOf(constants.INSTANCE_CREATE_MODES)),
6335 ("wait_for_sync", _TBool),
6336 ("ip_check", _TBool),
6337 ("disks", _TListOf(_TDict)),
6338 ("nics", _TListOf(_TDict)),
6339 ("hvparams", _TDict),
6340 ("beparams", _TDict),
6341 ("osparams", _TDict),
6344 ("name_check", True),
6345 ("no_install", False),
6347 ("force_variant", False),
6348 ("source_handshake", None),
6349 ("source_x509_ca", None),
6350 ("source_instance_name", None),
6355 ("iallocator", None),
6356 ("hypervisor", None),
6357 ("disk_template", None),
6358 ("identify_defaults", None),
6362 def CheckArguments(self):
6366 # do not require name_check to ease forward/backward compatibility
6368 if self.op.no_install and self.op.start:
6369 self.LogInfo("No-installation mode selected, disabling startup")
6370 self.op.start = False
6371 # validate/normalize the instance name
6372 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6373 if self.op.ip_check and not self.op.name_check:
6374 # TODO: make the ip check more flexible and not depend on the name check
6375 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6378 # check nics' parameter names
6379 for nic in self.op.nics:
6380 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6382 # check disks. parameter names and consistent adopt/no-adopt strategy
6383 has_adopt = has_no_adopt = False
6384 for disk in self.op.disks:
6385 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6390 if has_adopt and has_no_adopt:
6391 raise errors.OpPrereqError("Either all disks are adopted or none is",
6394 if self.op.disk_template != constants.DT_PLAIN:
6395 raise errors.OpPrereqError("Disk adoption is only supported for the"
6396 " 'plain' disk template",
6398 if self.op.iallocator is not None:
6399 raise errors.OpPrereqError("Disk adoption not allowed with an"
6400 " iallocator script", errors.ECODE_INVAL)
6401 if self.op.mode == constants.INSTANCE_IMPORT:
6402 raise errors.OpPrereqError("Disk adoption not allowed for"
6403 " instance import", errors.ECODE_INVAL)
6405 self.adopt_disks = has_adopt
6407 # instance name verification
6408 if self.op.name_check:
6409 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6410 self.op.instance_name = self.hostname1.name
6411 # used in CheckPrereq for ip ping check
6412 self.check_ip = self.hostname1.ip
6413 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6414 raise errors.OpPrereqError("Remote imports require names to be checked" %
6417 self.check_ip = None
6419 # file storage checks
6420 if (self.op.file_driver and
6421 not self.op.file_driver in constants.FILE_DRIVER):
6422 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6423 self.op.file_driver, errors.ECODE_INVAL)
6425 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6426 raise errors.OpPrereqError("File storage directory path not absolute",
6429 ### Node/iallocator related checks
6430 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6431 raise errors.OpPrereqError("One and only one of iallocator and primary"
6432 " node must be given",
6435 self._cds = _GetClusterDomainSecret()
6437 if self.op.mode == constants.INSTANCE_IMPORT:
6438 # On import force_variant must be True, because if we forced it at
6439 # initial install, our only chance when importing it back is that it
6441 self.op.force_variant = True
6443 if self.op.no_install:
6444 self.LogInfo("No-installation mode has no effect during import")
6446 elif self.op.mode == constants.INSTANCE_CREATE:
6447 if self.op.os_type is None:
6448 raise errors.OpPrereqError("No guest OS specified",
6450 if self.op.disk_template is None:
6451 raise errors.OpPrereqError("No disk template specified",
6454 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6455 # Check handshake to ensure both clusters have the same domain secret
6456 src_handshake = self.op.source_handshake
6457 if not src_handshake:
6458 raise errors.OpPrereqError("Missing source handshake",
6461 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6464 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6467 # Load and check source CA
6468 self.source_x509_ca_pem = self.op.source_x509_ca
6469 if not self.source_x509_ca_pem:
6470 raise errors.OpPrereqError("Missing source X509 CA",
6474 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6476 except OpenSSL.crypto.Error, err:
6477 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6478 (err, ), errors.ECODE_INVAL)
6480 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6481 if errcode is not None:
6482 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6485 self.source_x509_ca = cert
6487 src_instance_name = self.op.source_instance_name
6488 if not src_instance_name:
6489 raise errors.OpPrereqError("Missing source instance name",
6492 self.source_instance_name = \
6493 utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6496 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6497 self.op.mode, errors.ECODE_INVAL)
6499 def ExpandNames(self):
6500 """ExpandNames for CreateInstance.
6502 Figure out the right locks for instance creation.
6505 self.needed_locks = {}
6507 instance_name = self.op.instance_name
6508 # this is just a preventive check, but someone might still add this
6509 # instance in the meantime, and creation will fail at lock-add time
6510 if instance_name in self.cfg.GetInstanceList():
6511 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6512 instance_name, errors.ECODE_EXISTS)
6514 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6516 if self.op.iallocator:
6517 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6519 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6520 nodelist = [self.op.pnode]
6521 if self.op.snode is not None:
6522 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6523 nodelist.append(self.op.snode)
6524 self.needed_locks[locking.LEVEL_NODE] = nodelist
6526 # in case of import lock the source node too
6527 if self.op.mode == constants.INSTANCE_IMPORT:
6528 src_node = self.op.src_node
6529 src_path = self.op.src_path
6531 if src_path is None:
6532 self.op.src_path = src_path = self.op.instance_name
6534 if src_node is None:
6535 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6536 self.op.src_node = None
6537 if os.path.isabs(src_path):
6538 raise errors.OpPrereqError("Importing an instance from an absolute"
6539 " path requires a source node option.",
6542 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6543 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6544 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6545 if not os.path.isabs(src_path):
6546 self.op.src_path = src_path = \
6547 utils.PathJoin(constants.EXPORT_DIR, src_path)
6549 def _RunAllocator(self):
6550 """Run the allocator based on input opcode.
6553 nics = [n.ToDict() for n in self.nics]
6554 ial = IAllocator(self.cfg, self.rpc,
6555 mode=constants.IALLOCATOR_MODE_ALLOC,
6556 name=self.op.instance_name,
6557 disk_template=self.op.disk_template,
6560 vcpus=self.be_full[constants.BE_VCPUS],
6561 mem_size=self.be_full[constants.BE_MEMORY],
6564 hypervisor=self.op.hypervisor,
6567 ial.Run(self.op.iallocator)
6570 raise errors.OpPrereqError("Can't compute nodes using"
6571 " iallocator '%s': %s" %
6572 (self.op.iallocator, ial.info),
6574 if len(ial.result) != ial.required_nodes:
6575 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6576 " of nodes (%s), required %s" %
6577 (self.op.iallocator, len(ial.result),
6578 ial.required_nodes), errors.ECODE_FAULT)
6579 self.op.pnode = ial.result[0]
6580 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6581 self.op.instance_name, self.op.iallocator,
6582 utils.CommaJoin(ial.result))
6583 if ial.required_nodes == 2:
6584 self.op.snode = ial.result[1]
6586 def BuildHooksEnv(self):
6589 This runs on master, primary and secondary nodes of the instance.
6593 "ADD_MODE": self.op.mode,
6595 if self.op.mode == constants.INSTANCE_IMPORT:
6596 env["SRC_NODE"] = self.op.src_node
6597 env["SRC_PATH"] = self.op.src_path
6598 env["SRC_IMAGES"] = self.src_images
6600 env.update(_BuildInstanceHookEnv(
6601 name=self.op.instance_name,
6602 primary_node=self.op.pnode,
6603 secondary_nodes=self.secondaries,
6604 status=self.op.start,
6605 os_type=self.op.os_type,
6606 memory=self.be_full[constants.BE_MEMORY],
6607 vcpus=self.be_full[constants.BE_VCPUS],
6608 nics=_NICListToTuple(self, self.nics),
6609 disk_template=self.op.disk_template,
6610 disks=[(d["size"], d["mode"]) for d in self.disks],
6613 hypervisor_name=self.op.hypervisor,
6616 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6620 def _ReadExportInfo(self):
6621 """Reads the export information from disk.
6623 It will override the opcode source node and path with the actual
6624 information, if these two were not specified before.
6626 @return: the export information
6629 assert self.op.mode == constants.INSTANCE_IMPORT
6631 src_node = self.op.src_node
6632 src_path = self.op.src_path
6634 if src_node is None:
6635 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6636 exp_list = self.rpc.call_export_list(locked_nodes)
6638 for node in exp_list:
6639 if exp_list[node].fail_msg:
6641 if src_path in exp_list[node].payload:
6643 self.op.src_node = src_node = node
6644 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6648 raise errors.OpPrereqError("No export found for relative path %s" %
6649 src_path, errors.ECODE_INVAL)
6651 _CheckNodeOnline(self, src_node)
6652 result = self.rpc.call_export_info(src_node, src_path)
6653 result.Raise("No export or invalid export found in dir %s" % src_path)
6655 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6656 if not export_info.has_section(constants.INISECT_EXP):
6657 raise errors.ProgrammerError("Corrupted export config",
6658 errors.ECODE_ENVIRON)
6660 ei_version = export_info.get(constants.INISECT_EXP, "version")
6661 if (int(ei_version) != constants.EXPORT_VERSION):
6662 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6663 (ei_version, constants.EXPORT_VERSION),
6664 errors.ECODE_ENVIRON)
6667 def _ReadExportParams(self, einfo):
6668 """Use export parameters as defaults.
6670 In case the opcode doesn't specify (as in override) some instance
6671 parameters, then try to use them from the export information, if
6675 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6677 if self.op.disk_template is None:
6678 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6679 self.op.disk_template = einfo.get(constants.INISECT_INS,
6682 raise errors.OpPrereqError("No disk template specified and the export"
6683 " is missing the disk_template information",
6686 if not self.op.disks:
6687 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6689 # TODO: import the disk iv_name too
6690 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6691 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6692 disks.append({"size": disk_sz})
6693 self.op.disks = disks
6695 raise errors.OpPrereqError("No disk info specified and the export"
6696 " is missing the disk information",
6699 if (not self.op.nics and
6700 einfo.has_option(constants.INISECT_INS, "nic_count")):
6702 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6704 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6705 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6710 if (self.op.hypervisor is None and
6711 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6712 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6713 if einfo.has_section(constants.INISECT_HYP):
6714 # use the export parameters but do not override the ones
6715 # specified by the user
6716 for name, value in einfo.items(constants.INISECT_HYP):
6717 if name not in self.op.hvparams:
6718 self.op.hvparams[name] = value
6720 if einfo.has_section(constants.INISECT_BEP):
6721 # use the parameters, without overriding
6722 for name, value in einfo.items(constants.INISECT_BEP):
6723 if name not in self.op.beparams:
6724 self.op.beparams[name] = value
6726 # try to read the parameters old style, from the main section
6727 for name in constants.BES_PARAMETERS:
6728 if (name not in self.op.beparams and
6729 einfo.has_option(constants.INISECT_INS, name)):
6730 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6732 if einfo.has_section(constants.INISECT_OSP):
6733 # use the parameters, without overriding
6734 for name, value in einfo.items(constants.INISECT_OSP):
6735 if name not in self.op.osparams:
6736 self.op.osparams[name] = value
6738 def _RevertToDefaults(self, cluster):
6739 """Revert the instance parameters to the default values.
6743 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6744 for name in self.op.hvparams.keys():
6745 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6746 del self.op.hvparams[name]
6748 be_defs = cluster.SimpleFillBE({})
6749 for name in self.op.beparams.keys():
6750 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6751 del self.op.beparams[name]
6753 nic_defs = cluster.SimpleFillNIC({})
6754 for nic in self.op.nics:
6755 for name in constants.NICS_PARAMETERS:
6756 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6759 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6760 for name in self.op.osparams.keys():
6761 if name in os_defs and os_defs[name] == self.op.osparams[name]:
6762 del self.op.osparams[name]
6764 def CheckPrereq(self):
6765 """Check prerequisites.
6768 if self.op.mode == constants.INSTANCE_IMPORT:
6769 export_info = self._ReadExportInfo()
6770 self._ReadExportParams(export_info)
6772 _CheckDiskTemplate(self.op.disk_template)
6774 if (not self.cfg.GetVGName() and
6775 self.op.disk_template not in constants.DTS_NOT_LVM):
6776 raise errors.OpPrereqError("Cluster does not support lvm-based"
6777 " instances", errors.ECODE_STATE)
6779 if self.op.hypervisor is None:
6780 self.op.hypervisor = self.cfg.GetHypervisorType()
6782 cluster = self.cfg.GetClusterInfo()
6783 enabled_hvs = cluster.enabled_hypervisors
6784 if self.op.hypervisor not in enabled_hvs:
6785 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6786 " cluster (%s)" % (self.op.hypervisor,
6787 ",".join(enabled_hvs)),
6790 # check hypervisor parameter syntax (locally)
6791 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6792 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6794 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6795 hv_type.CheckParameterSyntax(filled_hvp)
6796 self.hv_full = filled_hvp
6797 # check that we don't specify global parameters on an instance
6798 _CheckGlobalHvParams(self.op.hvparams)
6800 # fill and remember the beparams dict
6801 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6802 self.be_full = cluster.SimpleFillBE(self.op.beparams)
6804 # build os parameters
6805 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6807 # now that hvp/bep are in final format, let's reset to defaults,
6809 if self.op.identify_defaults:
6810 self._RevertToDefaults(cluster)
6814 for idx, nic in enumerate(self.op.nics):
6815 nic_mode_req = nic.get("mode", None)
6816 nic_mode = nic_mode_req
6817 if nic_mode is None:
6818 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6820 # in routed mode, for the first nic, the default ip is 'auto'
6821 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6822 default_ip_mode = constants.VALUE_AUTO
6824 default_ip_mode = constants.VALUE_NONE
6826 # ip validity checks
6827 ip = nic.get("ip", default_ip_mode)
6828 if ip is None or ip.lower() == constants.VALUE_NONE:
6830 elif ip.lower() == constants.VALUE_AUTO:
6831 if not self.op.name_check:
6832 raise errors.OpPrereqError("IP address set to auto but name checks"
6833 " have been skipped. Aborting.",
6835 nic_ip = self.hostname1.ip
6837 if not utils.IsValidIP(ip):
6838 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6839 " like a valid IP" % ip,
6843 # TODO: check the ip address for uniqueness
6844 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6845 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6848 # MAC address verification
6849 mac = nic.get("mac", constants.VALUE_AUTO)
6850 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6851 mac = utils.NormalizeAndValidateMac(mac)
6854 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6855 except errors.ReservationError:
6856 raise errors.OpPrereqError("MAC address %s already in use"
6857 " in cluster" % mac,
6858 errors.ECODE_NOTUNIQUE)
6860 # bridge verification
6861 bridge = nic.get("bridge", None)
6862 link = nic.get("link", None)
6864 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6865 " at the same time", errors.ECODE_INVAL)
6866 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6867 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6874 nicparams[constants.NIC_MODE] = nic_mode_req
6876 nicparams[constants.NIC_LINK] = link
6878 check_params = cluster.SimpleFillNIC(nicparams)
6879 objects.NIC.CheckParameterSyntax(check_params)
6880 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6882 # disk checks/pre-build
6884 for disk in self.op.disks:
6885 mode = disk.get("mode", constants.DISK_RDWR)
6886 if mode not in constants.DISK_ACCESS_SET:
6887 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6888 mode, errors.ECODE_INVAL)
6889 size = disk.get("size", None)
6891 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6894 except (TypeError, ValueError):
6895 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6897 new_disk = {"size": size, "mode": mode}
6899 new_disk["adopt"] = disk["adopt"]
6900 self.disks.append(new_disk)
6902 if self.op.mode == constants.INSTANCE_IMPORT:
6904 # Check that the new instance doesn't have less disks than the export
6905 instance_disks = len(self.disks)
6906 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6907 if instance_disks < export_disks:
6908 raise errors.OpPrereqError("Not enough disks to import."
6909 " (instance: %d, export: %d)" %
6910 (instance_disks, export_disks),
6914 for idx in range(export_disks):
6915 option = 'disk%d_dump' % idx
6916 if export_info.has_option(constants.INISECT_INS, option):
6917 # FIXME: are the old os-es, disk sizes, etc. useful?
6918 export_name = export_info.get(constants.INISECT_INS, option)
6919 image = utils.PathJoin(self.op.src_path, export_name)
6920 disk_images.append(image)
6922 disk_images.append(False)
6924 self.src_images = disk_images
6926 old_name = export_info.get(constants.INISECT_INS, 'name')
6928 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6929 except (TypeError, ValueError), err:
6930 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6931 " an integer: %s" % str(err),
6933 if self.op.instance_name == old_name:
6934 for idx, nic in enumerate(self.nics):
6935 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6936 nic_mac_ini = 'nic%d_mac' % idx
6937 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6939 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6941 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6942 if self.op.ip_check:
6943 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6944 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6945 (self.check_ip, self.op.instance_name),
6946 errors.ECODE_NOTUNIQUE)
6948 #### mac address generation
6949 # By generating here the mac address both the allocator and the hooks get
6950 # the real final mac address rather than the 'auto' or 'generate' value.
6951 # There is a race condition between the generation and the instance object
6952 # creation, which means that we know the mac is valid now, but we're not
6953 # sure it will be when we actually add the instance. If things go bad
6954 # adding the instance will abort because of a duplicate mac, and the
6955 # creation job will fail.
6956 for nic in self.nics:
6957 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6958 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6962 if self.op.iallocator is not None:
6963 self._RunAllocator()
6965 #### node related checks
6967 # check primary node
6968 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6969 assert self.pnode is not None, \
6970 "Cannot retrieve locked node %s" % self.op.pnode
6972 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6973 pnode.name, errors.ECODE_STATE)
6975 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6976 pnode.name, errors.ECODE_STATE)
6978 self.secondaries = []
6980 # mirror node verification
6981 if self.op.disk_template in constants.DTS_NET_MIRROR:
6982 if self.op.snode is None:
6983 raise errors.OpPrereqError("The networked disk templates need"
6984 " a mirror node", errors.ECODE_INVAL)
6985 if self.op.snode == pnode.name:
6986 raise errors.OpPrereqError("The secondary node cannot be the"
6987 " primary node.", errors.ECODE_INVAL)
6988 _CheckNodeOnline(self, self.op.snode)
6989 _CheckNodeNotDrained(self, self.op.snode)
6990 self.secondaries.append(self.op.snode)
6992 nodenames = [pnode.name] + self.secondaries
6994 req_size = _ComputeDiskSize(self.op.disk_template,
6997 # Check lv size requirements, if not adopting
6998 if req_size is not None and not self.adopt_disks:
6999 _CheckNodesFreeDisk(self, nodenames, req_size)
7001 if self.adopt_disks: # instead, we must check the adoption data
7002 all_lvs = set([i["adopt"] for i in self.disks])
7003 if len(all_lvs) != len(self.disks):
7004 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7006 for lv_name in all_lvs:
7008 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7009 except errors.ReservationError:
7010 raise errors.OpPrereqError("LV named %s used by another instance" %
7011 lv_name, errors.ECODE_NOTUNIQUE)
7013 node_lvs = self.rpc.call_lv_list([pnode.name],
7014 self.cfg.GetVGName())[pnode.name]
7015 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7016 node_lvs = node_lvs.payload
7017 delta = all_lvs.difference(node_lvs.keys())
7019 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7020 utils.CommaJoin(delta),
7022 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7024 raise errors.OpPrereqError("Online logical volumes found, cannot"
7025 " adopt: %s" % utils.CommaJoin(online_lvs),
7027 # update the size of disk based on what is found
7028 for dsk in self.disks:
7029 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7031 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7033 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7034 # check OS parameters (remotely)
7035 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7037 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7039 # memory check on primary node
7041 _CheckNodeFreeMemory(self, self.pnode.name,
7042 "creating instance %s" % self.op.instance_name,
7043 self.be_full[constants.BE_MEMORY],
7046 self.dry_run_result = list(nodenames)
7048 def Exec(self, feedback_fn):
7049 """Create and add the instance to the cluster.
7052 instance = self.op.instance_name
7053 pnode_name = self.pnode.name
7055 ht_kind = self.op.hypervisor
7056 if ht_kind in constants.HTS_REQ_PORT:
7057 network_port = self.cfg.AllocatePort()
7061 if constants.ENABLE_FILE_STORAGE:
7062 # this is needed because os.path.join does not accept None arguments
7063 if self.op.file_storage_dir is None:
7064 string_file_storage_dir = ""
7066 string_file_storage_dir = self.op.file_storage_dir
7068 # build the full file storage dir path
7069 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7070 string_file_storage_dir, instance)
7072 file_storage_dir = ""
7074 disks = _GenerateDiskTemplate(self,
7075 self.op.disk_template,
7076 instance, pnode_name,
7080 self.op.file_driver,
7083 iobj = objects.Instance(name=instance, os=self.op.os_type,
7084 primary_node=pnode_name,
7085 nics=self.nics, disks=disks,
7086 disk_template=self.op.disk_template,
7088 network_port=network_port,
7089 beparams=self.op.beparams,
7090 hvparams=self.op.hvparams,
7091 hypervisor=self.op.hypervisor,
7092 osparams=self.op.osparams,
7095 if self.adopt_disks:
7096 # rename LVs to the newly-generated names; we need to construct
7097 # 'fake' LV disks with the old data, plus the new unique_id
7098 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7100 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7101 rename_to.append(t_dsk.logical_id)
7102 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7103 self.cfg.SetDiskID(t_dsk, pnode_name)
7104 result = self.rpc.call_blockdev_rename(pnode_name,
7105 zip(tmp_disks, rename_to))
7106 result.Raise("Failed to rename adoped LVs")
7108 feedback_fn("* creating instance disks...")
7110 _CreateDisks(self, iobj)
7111 except errors.OpExecError:
7112 self.LogWarning("Device creation failed, reverting...")
7114 _RemoveDisks(self, iobj)
7116 self.cfg.ReleaseDRBDMinors(instance)
7119 feedback_fn("adding instance %s to cluster config" % instance)
7121 self.cfg.AddInstance(iobj, self.proc.GetECId())
7123 # Declare that we don't want to remove the instance lock anymore, as we've
7124 # added the instance to the config
7125 del self.remove_locks[locking.LEVEL_INSTANCE]
7126 # Unlock all the nodes
7127 if self.op.mode == constants.INSTANCE_IMPORT:
7128 nodes_keep = [self.op.src_node]
7129 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7130 if node != self.op.src_node]
7131 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7132 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7134 self.context.glm.release(locking.LEVEL_NODE)
7135 del self.acquired_locks[locking.LEVEL_NODE]
7137 if self.op.wait_for_sync:
7138 disk_abort = not _WaitForSync(self, iobj)
7139 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7140 # make sure the disks are not degraded (still sync-ing is ok)
7142 feedback_fn("* checking mirrors status")
7143 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7148 _RemoveDisks(self, iobj)
7149 self.cfg.RemoveInstance(iobj.name)
7150 # Make sure the instance lock gets removed
7151 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7152 raise errors.OpExecError("There are some degraded disks for"
7155 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7156 if self.op.mode == constants.INSTANCE_CREATE:
7157 if not self.op.no_install:
7158 feedback_fn("* running the instance OS create scripts...")
7159 # FIXME: pass debug option from opcode to backend
7160 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7161 self.op.debug_level)
7162 result.Raise("Could not add os for instance %s"
7163 " on node %s" % (instance, pnode_name))
7165 elif self.op.mode == constants.INSTANCE_IMPORT:
7166 feedback_fn("* running the instance OS import scripts...")
7170 for idx, image in enumerate(self.src_images):
7174 # FIXME: pass debug option from opcode to backend
7175 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7176 constants.IEIO_FILE, (image, ),
7177 constants.IEIO_SCRIPT,
7178 (iobj.disks[idx], idx),
7180 transfers.append(dt)
7183 masterd.instance.TransferInstanceData(self, feedback_fn,
7184 self.op.src_node, pnode_name,
7185 self.pnode.secondary_ip,
7187 if not compat.all(import_result):
7188 self.LogWarning("Some disks for instance %s on node %s were not"
7189 " imported successfully" % (instance, pnode_name))
7191 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7192 feedback_fn("* preparing remote import...")
7193 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7194 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7196 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7197 self.source_x509_ca,
7198 self._cds, timeouts)
7199 if not compat.all(disk_results):
7200 # TODO: Should the instance still be started, even if some disks
7201 # failed to import (valid for local imports, too)?
7202 self.LogWarning("Some disks for instance %s on node %s were not"
7203 " imported successfully" % (instance, pnode_name))
7205 # Run rename script on newly imported instance
7206 assert iobj.name == instance
7207 feedback_fn("Running rename script for %s" % instance)
7208 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7209 self.source_instance_name,
7210 self.op.debug_level)
7212 self.LogWarning("Failed to run rename script for %s on node"
7213 " %s: %s" % (instance, pnode_name, result.fail_msg))
7216 # also checked in the prereq part
7217 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7221 iobj.admin_up = True
7222 self.cfg.Update(iobj, feedback_fn)
7223 logging.info("Starting instance %s on node %s", instance, pnode_name)
7224 feedback_fn("* starting instance...")
7225 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7226 result.Raise("Could not start instance")
7228 return list(iobj.all_nodes)
7231 class LUConnectConsole(NoHooksLU):
7232 """Connect to an instance's console.
7234 This is somewhat special in that it returns the command line that
7235 you need to run on the master node in order to connect to the
7239 _OP_REQP = [("instance_name", _TNEString)]
7242 def ExpandNames(self):
7243 self._ExpandAndLockInstance()
7245 def CheckPrereq(self):
7246 """Check prerequisites.
7248 This checks that the instance is in the cluster.
7251 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7252 assert self.instance is not None, \
7253 "Cannot retrieve locked instance %s" % self.op.instance_name
7254 _CheckNodeOnline(self, self.instance.primary_node)
7256 def Exec(self, feedback_fn):
7257 """Connect to the console of an instance
7260 instance = self.instance
7261 node = instance.primary_node
7263 node_insts = self.rpc.call_instance_list([node],
7264 [instance.hypervisor])[node]
7265 node_insts.Raise("Can't get node information from %s" % node)
7267 if instance.name not in node_insts.payload:
7268 raise errors.OpExecError("Instance %s is not running." % instance.name)
7270 logging.debug("Connecting to console of %s on %s", instance.name, node)
7272 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7273 cluster = self.cfg.GetClusterInfo()
7274 # beparams and hvparams are passed separately, to avoid editing the
7275 # instance and then saving the defaults in the instance itself.
7276 hvparams = cluster.FillHV(instance)
7277 beparams = cluster.FillBE(instance)
7278 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7281 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7284 class LUReplaceDisks(LogicalUnit):
7285 """Replace the disks of an instance.
7288 HPATH = "mirrors-replace"
7289 HTYPE = constants.HTYPE_INSTANCE
7291 ("instance_name", _TNEString),
7292 ("mode", _TElemOf(constants.REPLACE_MODES)),
7293 ("disks", _TListOf(_TPInt)),
7296 ("remote_node", None),
7297 ("iallocator", None),
7298 ("early_release", None),
7302 def CheckArguments(self):
7303 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7306 def ExpandNames(self):
7307 self._ExpandAndLockInstance()
7309 if self.op.iallocator is not None:
7310 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7312 elif self.op.remote_node is not None:
7313 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7314 self.op.remote_node = remote_node
7316 # Warning: do not remove the locking of the new secondary here
7317 # unless DRBD8.AddChildren is changed to work in parallel;
7318 # currently it doesn't since parallel invocations of
7319 # FindUnusedMinor will conflict
7320 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7321 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7324 self.needed_locks[locking.LEVEL_NODE] = []
7325 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7327 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7328 self.op.iallocator, self.op.remote_node,
7329 self.op.disks, False, self.op.early_release)
7331 self.tasklets = [self.replacer]
7333 def DeclareLocks(self, level):
7334 # If we're not already locking all nodes in the set we have to declare the
7335 # instance's primary/secondary nodes.
7336 if (level == locking.LEVEL_NODE and
7337 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7338 self._LockInstancesNodes()
7340 def BuildHooksEnv(self):
7343 This runs on the master, the primary and all the secondaries.
7346 instance = self.replacer.instance
7348 "MODE": self.op.mode,
7349 "NEW_SECONDARY": self.op.remote_node,
7350 "OLD_SECONDARY": instance.secondary_nodes[0],
7352 env.update(_BuildInstanceHookEnvByObject(self, instance))
7354 self.cfg.GetMasterNode(),
7355 instance.primary_node,
7357 if self.op.remote_node is not None:
7358 nl.append(self.op.remote_node)
7362 class TLReplaceDisks(Tasklet):
7363 """Replaces disks for an instance.
7365 Note: Locking is not within the scope of this class.
7368 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7369 disks, delay_iallocator, early_release):
7370 """Initializes this class.
7373 Tasklet.__init__(self, lu)
7376 self.instance_name = instance_name
7378 self.iallocator_name = iallocator_name
7379 self.remote_node = remote_node
7381 self.delay_iallocator = delay_iallocator
7382 self.early_release = early_release
7385 self.instance = None
7386 self.new_node = None
7387 self.target_node = None
7388 self.other_node = None
7389 self.remote_node_info = None
7390 self.node_secondary_ip = None
7393 def CheckArguments(mode, remote_node, iallocator):
7394 """Helper function for users of this class.
7397 # check for valid parameter combination
7398 if mode == constants.REPLACE_DISK_CHG:
7399 if remote_node is None and iallocator is None:
7400 raise errors.OpPrereqError("When changing the secondary either an"
7401 " iallocator script must be used or the"
7402 " new node given", errors.ECODE_INVAL)
7404 if remote_node is not None and iallocator is not None:
7405 raise errors.OpPrereqError("Give either the iallocator or the new"
7406 " secondary, not both", errors.ECODE_INVAL)
7408 elif remote_node is not None or iallocator is not None:
7409 # Not replacing the secondary
7410 raise errors.OpPrereqError("The iallocator and new node options can"
7411 " only be used when changing the"
7412 " secondary node", errors.ECODE_INVAL)
7415 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7416 """Compute a new secondary node using an IAllocator.
7419 ial = IAllocator(lu.cfg, lu.rpc,
7420 mode=constants.IALLOCATOR_MODE_RELOC,
7422 relocate_from=relocate_from)
7424 ial.Run(iallocator_name)
7427 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7428 " %s" % (iallocator_name, ial.info),
7431 if len(ial.result) != ial.required_nodes:
7432 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7433 " of nodes (%s), required %s" %
7435 len(ial.result), ial.required_nodes),
7438 remote_node_name = ial.result[0]
7440 lu.LogInfo("Selected new secondary for instance '%s': %s",
7441 instance_name, remote_node_name)
7443 return remote_node_name
7445 def _FindFaultyDisks(self, node_name):
7446 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7449 def CheckPrereq(self):
7450 """Check prerequisites.
7452 This checks that the instance is in the cluster.
7455 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7456 assert instance is not None, \
7457 "Cannot retrieve locked instance %s" % self.instance_name
7459 if instance.disk_template != constants.DT_DRBD8:
7460 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7461 " instances", errors.ECODE_INVAL)
7463 if len(instance.secondary_nodes) != 1:
7464 raise errors.OpPrereqError("The instance has a strange layout,"
7465 " expected one secondary but found %d" %
7466 len(instance.secondary_nodes),
7469 if not self.delay_iallocator:
7470 self._CheckPrereq2()
7472 def _CheckPrereq2(self):
7473 """Check prerequisites, second part.
7475 This function should always be part of CheckPrereq. It was separated and is
7476 now called from Exec because during node evacuation iallocator was only
7477 called with an unmodified cluster model, not taking planned changes into
7481 instance = self.instance
7482 secondary_node = instance.secondary_nodes[0]
7484 if self.iallocator_name is None:
7485 remote_node = self.remote_node
7487 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7488 instance.name, instance.secondary_nodes)
7490 if remote_node is not None:
7491 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7492 assert self.remote_node_info is not None, \
7493 "Cannot retrieve locked node %s" % remote_node
7495 self.remote_node_info = None
7497 if remote_node == self.instance.primary_node:
7498 raise errors.OpPrereqError("The specified node is the primary node of"
7499 " the instance.", errors.ECODE_INVAL)
7501 if remote_node == secondary_node:
7502 raise errors.OpPrereqError("The specified node is already the"
7503 " secondary node of the instance.",
7506 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7507 constants.REPLACE_DISK_CHG):
7508 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7511 if self.mode == constants.REPLACE_DISK_AUTO:
7512 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7513 faulty_secondary = self._FindFaultyDisks(secondary_node)
7515 if faulty_primary and faulty_secondary:
7516 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7517 " one node and can not be repaired"
7518 " automatically" % self.instance_name,
7522 self.disks = faulty_primary
7523 self.target_node = instance.primary_node
7524 self.other_node = secondary_node
7525 check_nodes = [self.target_node, self.other_node]
7526 elif faulty_secondary:
7527 self.disks = faulty_secondary
7528 self.target_node = secondary_node
7529 self.other_node = instance.primary_node
7530 check_nodes = [self.target_node, self.other_node]
7536 # Non-automatic modes
7537 if self.mode == constants.REPLACE_DISK_PRI:
7538 self.target_node = instance.primary_node
7539 self.other_node = secondary_node
7540 check_nodes = [self.target_node, self.other_node]
7542 elif self.mode == constants.REPLACE_DISK_SEC:
7543 self.target_node = secondary_node
7544 self.other_node = instance.primary_node
7545 check_nodes = [self.target_node, self.other_node]
7547 elif self.mode == constants.REPLACE_DISK_CHG:
7548 self.new_node = remote_node
7549 self.other_node = instance.primary_node
7550 self.target_node = secondary_node
7551 check_nodes = [self.new_node, self.other_node]
7553 _CheckNodeNotDrained(self.lu, remote_node)
7555 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7556 assert old_node_info is not None
7557 if old_node_info.offline and not self.early_release:
7558 # doesn't make sense to delay the release
7559 self.early_release = True
7560 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7561 " early-release mode", secondary_node)
7564 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7567 # If not specified all disks should be replaced
7569 self.disks = range(len(self.instance.disks))
7571 for node in check_nodes:
7572 _CheckNodeOnline(self.lu, node)
7574 # Check whether disks are valid
7575 for disk_idx in self.disks:
7576 instance.FindDisk(disk_idx)
7578 # Get secondary node IP addresses
7581 for node_name in [self.target_node, self.other_node, self.new_node]:
7582 if node_name is not None:
7583 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7585 self.node_secondary_ip = node_2nd_ip
7587 def Exec(self, feedback_fn):
7588 """Execute disk replacement.
7590 This dispatches the disk replacement to the appropriate handler.
7593 if self.delay_iallocator:
7594 self._CheckPrereq2()
7597 feedback_fn("No disks need replacement")
7600 feedback_fn("Replacing disk(s) %s for %s" %
7601 (utils.CommaJoin(self.disks), self.instance.name))
7603 activate_disks = (not self.instance.admin_up)
7605 # Activate the instance disks if we're replacing them on a down instance
7607 _StartInstanceDisks(self.lu, self.instance, True)
7610 # Should we replace the secondary node?
7611 if self.new_node is not None:
7612 fn = self._ExecDrbd8Secondary
7614 fn = self._ExecDrbd8DiskOnly
7616 return fn(feedback_fn)
7619 # Deactivate the instance disks if we're replacing them on a
7622 _SafeShutdownInstanceDisks(self.lu, self.instance)
7624 def _CheckVolumeGroup(self, nodes):
7625 self.lu.LogInfo("Checking volume groups")
7627 vgname = self.cfg.GetVGName()
7629 # Make sure volume group exists on all involved nodes
7630 results = self.rpc.call_vg_list(nodes)
7632 raise errors.OpExecError("Can't list volume groups on the nodes")
7636 res.Raise("Error checking node %s" % node)
7637 if vgname not in res.payload:
7638 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7641 def _CheckDisksExistence(self, nodes):
7642 # Check disk existence
7643 for idx, dev in enumerate(self.instance.disks):
7644 if idx not in self.disks:
7648 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7649 self.cfg.SetDiskID(dev, node)
7651 result = self.rpc.call_blockdev_find(node, dev)
7653 msg = result.fail_msg
7654 if msg or not result.payload:
7656 msg = "disk not found"
7657 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7660 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7661 for idx, dev in enumerate(self.instance.disks):
7662 if idx not in self.disks:
7665 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7668 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7670 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7671 " replace disks for instance %s" %
7672 (node_name, self.instance.name))
7674 def _CreateNewStorage(self, node_name):
7675 vgname = self.cfg.GetVGName()
7678 for idx, dev in enumerate(self.instance.disks):
7679 if idx not in self.disks:
7682 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7684 self.cfg.SetDiskID(dev, node_name)
7686 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7687 names = _GenerateUniqueNames(self.lu, lv_names)
7689 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7690 logical_id=(vgname, names[0]))
7691 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7692 logical_id=(vgname, names[1]))
7694 new_lvs = [lv_data, lv_meta]
7695 old_lvs = dev.children
7696 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7698 # we pass force_create=True to force the LVM creation
7699 for new_lv in new_lvs:
7700 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7701 _GetInstanceInfoText(self.instance), False)
7705 def _CheckDevices(self, node_name, iv_names):
7706 for name, (dev, _, _) in iv_names.iteritems():
7707 self.cfg.SetDiskID(dev, node_name)
7709 result = self.rpc.call_blockdev_find(node_name, dev)
7711 msg = result.fail_msg
7712 if msg or not result.payload:
7714 msg = "disk not found"
7715 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7718 if result.payload.is_degraded:
7719 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7721 def _RemoveOldStorage(self, node_name, iv_names):
7722 for name, (_, old_lvs, _) in iv_names.iteritems():
7723 self.lu.LogInfo("Remove logical volumes for %s" % name)
7726 self.cfg.SetDiskID(lv, node_name)
7728 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7730 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7731 hint="remove unused LVs manually")
7733 def _ReleaseNodeLock(self, node_name):
7734 """Releases the lock for a given node."""
7735 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7737 def _ExecDrbd8DiskOnly(self, feedback_fn):
7738 """Replace a disk on the primary or secondary for DRBD 8.
7740 The algorithm for replace is quite complicated:
7742 1. for each disk to be replaced:
7744 1. create new LVs on the target node with unique names
7745 1. detach old LVs from the drbd device
7746 1. rename old LVs to name_replaced.<time_t>
7747 1. rename new LVs to old LVs
7748 1. attach the new LVs (with the old names now) to the drbd device
7750 1. wait for sync across all devices
7752 1. for each modified disk:
7754 1. remove old LVs (which have the name name_replaces.<time_t>)
7756 Failures are not very well handled.
7761 # Step: check device activation
7762 self.lu.LogStep(1, steps_total, "Check device existence")
7763 self._CheckDisksExistence([self.other_node, self.target_node])
7764 self._CheckVolumeGroup([self.target_node, self.other_node])
7766 # Step: check other node consistency
7767 self.lu.LogStep(2, steps_total, "Check peer consistency")
7768 self._CheckDisksConsistency(self.other_node,
7769 self.other_node == self.instance.primary_node,
7772 # Step: create new storage
7773 self.lu.LogStep(3, steps_total, "Allocate new storage")
7774 iv_names = self._CreateNewStorage(self.target_node)
7776 # Step: for each lv, detach+rename*2+attach
7777 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7778 for dev, old_lvs, new_lvs in iv_names.itervalues():
7779 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7781 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7783 result.Raise("Can't detach drbd from local storage on node"
7784 " %s for device %s" % (self.target_node, dev.iv_name))
7786 #cfg.Update(instance)
7788 # ok, we created the new LVs, so now we know we have the needed
7789 # storage; as such, we proceed on the target node to rename
7790 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7791 # using the assumption that logical_id == physical_id (which in
7792 # turn is the unique_id on that node)
7794 # FIXME(iustin): use a better name for the replaced LVs
7795 temp_suffix = int(time.time())
7796 ren_fn = lambda d, suff: (d.physical_id[0],
7797 d.physical_id[1] + "_replaced-%s" % suff)
7799 # Build the rename list based on what LVs exist on the node
7800 rename_old_to_new = []
7801 for to_ren in old_lvs:
7802 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7803 if not result.fail_msg and result.payload:
7805 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7807 self.lu.LogInfo("Renaming the old LVs on the target node")
7808 result = self.rpc.call_blockdev_rename(self.target_node,
7810 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7812 # Now we rename the new LVs to the old LVs
7813 self.lu.LogInfo("Renaming the new LVs on the target node")
7814 rename_new_to_old = [(new, old.physical_id)
7815 for old, new in zip(old_lvs, new_lvs)]
7816 result = self.rpc.call_blockdev_rename(self.target_node,
7818 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7820 for old, new in zip(old_lvs, new_lvs):
7821 new.logical_id = old.logical_id
7822 self.cfg.SetDiskID(new, self.target_node)
7824 for disk in old_lvs:
7825 disk.logical_id = ren_fn(disk, temp_suffix)
7826 self.cfg.SetDiskID(disk, self.target_node)
7828 # Now that the new lvs have the old name, we can add them to the device
7829 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7830 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7832 msg = result.fail_msg
7834 for new_lv in new_lvs:
7835 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7838 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7839 hint=("cleanup manually the unused logical"
7841 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7843 dev.children = new_lvs
7845 self.cfg.Update(self.instance, feedback_fn)
7848 if self.early_release:
7849 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7851 self._RemoveOldStorage(self.target_node, iv_names)
7852 # WARNING: we release both node locks here, do not do other RPCs
7853 # than WaitForSync to the primary node
7854 self._ReleaseNodeLock([self.target_node, self.other_node])
7857 # This can fail as the old devices are degraded and _WaitForSync
7858 # does a combined result over all disks, so we don't check its return value
7859 self.lu.LogStep(cstep, steps_total, "Sync devices")
7861 _WaitForSync(self.lu, self.instance)
7863 # Check all devices manually
7864 self._CheckDevices(self.instance.primary_node, iv_names)
7866 # Step: remove old storage
7867 if not self.early_release:
7868 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7870 self._RemoveOldStorage(self.target_node, iv_names)
7872 def _ExecDrbd8Secondary(self, feedback_fn):
7873 """Replace the secondary node for DRBD 8.
7875 The algorithm for replace is quite complicated:
7876 - for all disks of the instance:
7877 - create new LVs on the new node with same names
7878 - shutdown the drbd device on the old secondary
7879 - disconnect the drbd network on the primary
7880 - create the drbd device on the new secondary
7881 - network attach the drbd on the primary, using an artifice:
7882 the drbd code for Attach() will connect to the network if it
7883 finds a device which is connected to the good local disks but
7885 - wait for sync across all devices
7886 - remove all disks from the old secondary
7888 Failures are not very well handled.
7893 # Step: check device activation
7894 self.lu.LogStep(1, steps_total, "Check device existence")
7895 self._CheckDisksExistence([self.instance.primary_node])
7896 self._CheckVolumeGroup([self.instance.primary_node])
7898 # Step: check other node consistency
7899 self.lu.LogStep(2, steps_total, "Check peer consistency")
7900 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7902 # Step: create new storage
7903 self.lu.LogStep(3, steps_total, "Allocate new storage")
7904 for idx, dev in enumerate(self.instance.disks):
7905 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7906 (self.new_node, idx))
7907 # we pass force_create=True to force LVM creation
7908 for new_lv in dev.children:
7909 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7910 _GetInstanceInfoText(self.instance), False)
7912 # Step 4: dbrd minors and drbd setups changes
7913 # after this, we must manually remove the drbd minors on both the
7914 # error and the success paths
7915 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7916 minors = self.cfg.AllocateDRBDMinor([self.new_node
7917 for dev in self.instance.disks],
7919 logging.debug("Allocated minors %r", minors)
7922 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7923 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7924 (self.new_node, idx))
7925 # create new devices on new_node; note that we create two IDs:
7926 # one without port, so the drbd will be activated without
7927 # networking information on the new node at this stage, and one
7928 # with network, for the latter activation in step 4
7929 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7930 if self.instance.primary_node == o_node1:
7933 assert self.instance.primary_node == o_node2, "Three-node instance?"
7936 new_alone_id = (self.instance.primary_node, self.new_node, None,
7937 p_minor, new_minor, o_secret)
7938 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7939 p_minor, new_minor, o_secret)
7941 iv_names[idx] = (dev, dev.children, new_net_id)
7942 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7944 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7945 logical_id=new_alone_id,
7946 children=dev.children,
7949 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7950 _GetInstanceInfoText(self.instance), False)
7951 except errors.GenericError:
7952 self.cfg.ReleaseDRBDMinors(self.instance.name)
7955 # We have new devices, shutdown the drbd on the old secondary
7956 for idx, dev in enumerate(self.instance.disks):
7957 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7958 self.cfg.SetDiskID(dev, self.target_node)
7959 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7961 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7962 "node: %s" % (idx, msg),
7963 hint=("Please cleanup this device manually as"
7964 " soon as possible"))
7966 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7967 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7968 self.node_secondary_ip,
7969 self.instance.disks)\
7970 [self.instance.primary_node]
7972 msg = result.fail_msg
7974 # detaches didn't succeed (unlikely)
7975 self.cfg.ReleaseDRBDMinors(self.instance.name)
7976 raise errors.OpExecError("Can't detach the disks from the network on"
7977 " old node: %s" % (msg,))
7979 # if we managed to detach at least one, we update all the disks of
7980 # the instance to point to the new secondary
7981 self.lu.LogInfo("Updating instance configuration")
7982 for dev, _, new_logical_id in iv_names.itervalues():
7983 dev.logical_id = new_logical_id
7984 self.cfg.SetDiskID(dev, self.instance.primary_node)
7986 self.cfg.Update(self.instance, feedback_fn)
7988 # and now perform the drbd attach
7989 self.lu.LogInfo("Attaching primary drbds to new secondary"
7990 " (standalone => connected)")
7991 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7993 self.node_secondary_ip,
7994 self.instance.disks,
7997 for to_node, to_result in result.items():
7998 msg = to_result.fail_msg
8000 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8002 hint=("please do a gnt-instance info to see the"
8003 " status of disks"))
8005 if self.early_release:
8006 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8008 self._RemoveOldStorage(self.target_node, iv_names)
8009 # WARNING: we release all node locks here, do not do other RPCs
8010 # than WaitForSync to the primary node
8011 self._ReleaseNodeLock([self.instance.primary_node,
8016 # This can fail as the old devices are degraded and _WaitForSync
8017 # does a combined result over all disks, so we don't check its return value
8018 self.lu.LogStep(cstep, steps_total, "Sync devices")
8020 _WaitForSync(self.lu, self.instance)
8022 # Check all devices manually
8023 self._CheckDevices(self.instance.primary_node, iv_names)
8025 # Step: remove old storage
8026 if not self.early_release:
8027 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8028 self._RemoveOldStorage(self.target_node, iv_names)
8031 class LURepairNodeStorage(NoHooksLU):
8032 """Repairs the volume group on a node.
8035 _OP_REQP = [("node_name", _TNEString)]
8038 def CheckArguments(self):
8039 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8041 _CheckStorageType(self.op.storage_type)
8043 storage_type = self.op.storage_type
8045 if (constants.SO_FIX_CONSISTENCY not in
8046 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8047 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8048 " repaired" % storage_type,
8051 def ExpandNames(self):
8052 self.needed_locks = {
8053 locking.LEVEL_NODE: [self.op.node_name],
8056 def _CheckFaultyDisks(self, instance, node_name):
8057 """Ensure faulty disks abort the opcode or at least warn."""
8059 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8061 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8062 " node '%s'" % (instance.name, node_name),
8064 except errors.OpPrereqError, err:
8065 if self.op.ignore_consistency:
8066 self.proc.LogWarning(str(err.args[0]))
8070 def CheckPrereq(self):
8071 """Check prerequisites.
8074 # Check whether any instance on this node has faulty disks
8075 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8076 if not inst.admin_up:
8078 check_nodes = set(inst.all_nodes)
8079 check_nodes.discard(self.op.node_name)
8080 for inst_node_name in check_nodes:
8081 self._CheckFaultyDisks(inst, inst_node_name)
8083 def Exec(self, feedback_fn):
8084 feedback_fn("Repairing storage unit '%s' on %s ..." %
8085 (self.op.name, self.op.node_name))
8087 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8088 result = self.rpc.call_storage_execute(self.op.node_name,
8089 self.op.storage_type, st_args,
8091 constants.SO_FIX_CONSISTENCY)
8092 result.Raise("Failed to repair storage unit '%s' on %s" %
8093 (self.op.name, self.op.node_name))
8096 class LUNodeEvacuationStrategy(NoHooksLU):
8097 """Computes the node evacuation strategy.
8100 _OP_REQP = [("nodes", _TListOf(_TNEString))]
8102 ("remote_node", None),
8103 ("iallocator", None),
8107 def CheckArguments(self):
8108 if self.op.remote_node is not None and self.op.iallocator is not None:
8109 raise errors.OpPrereqError("Give either the iallocator or the new"
8110 " secondary, not both", errors.ECODE_INVAL)
8112 def ExpandNames(self):
8113 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8114 self.needed_locks = locks = {}
8115 if self.op.remote_node is None:
8116 locks[locking.LEVEL_NODE] = locking.ALL_SET
8118 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8119 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8121 def Exec(self, feedback_fn):
8122 if self.op.remote_node is not None:
8124 for node in self.op.nodes:
8125 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8128 if i.primary_node == self.op.remote_node:
8129 raise errors.OpPrereqError("Node %s is the primary node of"
8130 " instance %s, cannot use it as"
8132 (self.op.remote_node, i.name),
8134 result.append([i.name, self.op.remote_node])
8136 ial = IAllocator(self.cfg, self.rpc,
8137 mode=constants.IALLOCATOR_MODE_MEVAC,
8138 evac_nodes=self.op.nodes)
8139 ial.Run(self.op.iallocator, validate=True)
8141 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8147 class LUGrowDisk(LogicalUnit):
8148 """Grow a disk of an instance.
8152 HTYPE = constants.HTYPE_INSTANCE
8154 ("instance_name", _TNEString),
8157 ("wait_for_sync", _TBool),
8161 def ExpandNames(self):
8162 self._ExpandAndLockInstance()
8163 self.needed_locks[locking.LEVEL_NODE] = []
8164 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8166 def DeclareLocks(self, level):
8167 if level == locking.LEVEL_NODE:
8168 self._LockInstancesNodes()
8170 def BuildHooksEnv(self):
8173 This runs on the master, the primary and all the secondaries.
8177 "DISK": self.op.disk,
8178 "AMOUNT": self.op.amount,
8180 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8181 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8184 def CheckPrereq(self):
8185 """Check prerequisites.
8187 This checks that the instance is in the cluster.
8190 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8191 assert instance is not None, \
8192 "Cannot retrieve locked instance %s" % self.op.instance_name
8193 nodenames = list(instance.all_nodes)
8194 for node in nodenames:
8195 _CheckNodeOnline(self, node)
8197 self.instance = instance
8199 if instance.disk_template not in constants.DTS_GROWABLE:
8200 raise errors.OpPrereqError("Instance's disk layout does not support"
8201 " growing.", errors.ECODE_INVAL)
8203 self.disk = instance.FindDisk(self.op.disk)
8205 if instance.disk_template != constants.DT_FILE:
8206 # TODO: check the free disk space for file, when that feature will be
8208 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8210 def Exec(self, feedback_fn):
8211 """Execute disk grow.
8214 instance = self.instance
8217 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8219 raise errors.OpExecError("Cannot activate block device to grow")
8221 for node in instance.all_nodes:
8222 self.cfg.SetDiskID(disk, node)
8223 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8224 result.Raise("Grow request failed to node %s" % node)
8226 # TODO: Rewrite code to work properly
8227 # DRBD goes into sync mode for a short amount of time after executing the
8228 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8229 # calling "resize" in sync mode fails. Sleeping for a short amount of
8230 # time is a work-around.
8233 disk.RecordGrow(self.op.amount)
8234 self.cfg.Update(instance, feedback_fn)
8235 if self.op.wait_for_sync:
8236 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8238 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8239 " status.\nPlease check the instance.")
8240 if not instance.admin_up:
8241 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8242 elif not instance.admin_up:
8243 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8244 " not supposed to be running because no wait for"
8245 " sync mode was requested.")
8248 class LUQueryInstanceData(NoHooksLU):
8249 """Query runtime instance data.
8253 ("instances", _TListOf(_TNEString)),
8258 def ExpandNames(self):
8259 self.needed_locks = {}
8260 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8262 if self.op.instances:
8263 self.wanted_names = []
8264 for name in self.op.instances:
8265 full_name = _ExpandInstanceName(self.cfg, name)
8266 self.wanted_names.append(full_name)
8267 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8269 self.wanted_names = None
8270 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8272 self.needed_locks[locking.LEVEL_NODE] = []
8273 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8275 def DeclareLocks(self, level):
8276 if level == locking.LEVEL_NODE:
8277 self._LockInstancesNodes()
8279 def CheckPrereq(self):
8280 """Check prerequisites.
8282 This only checks the optional instance list against the existing names.
8285 if self.wanted_names is None:
8286 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8288 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8289 in self.wanted_names]
8291 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8292 """Returns the status of a block device
8295 if self.op.static or not node:
8298 self.cfg.SetDiskID(dev, node)
8300 result = self.rpc.call_blockdev_find(node, dev)
8304 result.Raise("Can't compute disk status for %s" % instance_name)
8306 status = result.payload
8310 return (status.dev_path, status.major, status.minor,
8311 status.sync_percent, status.estimated_time,
8312 status.is_degraded, status.ldisk_status)
8314 def _ComputeDiskStatus(self, instance, snode, dev):
8315 """Compute block device status.
8318 if dev.dev_type in constants.LDS_DRBD:
8319 # we change the snode then (otherwise we use the one passed in)
8320 if dev.logical_id[0] == instance.primary_node:
8321 snode = dev.logical_id[1]
8323 snode = dev.logical_id[0]
8325 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8327 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8330 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8331 for child in dev.children]
8336 "iv_name": dev.iv_name,
8337 "dev_type": dev.dev_type,
8338 "logical_id": dev.logical_id,
8339 "physical_id": dev.physical_id,
8340 "pstatus": dev_pstatus,
8341 "sstatus": dev_sstatus,
8342 "children": dev_children,
8349 def Exec(self, feedback_fn):
8350 """Gather and return data"""
8353 cluster = self.cfg.GetClusterInfo()
8355 for instance in self.wanted_instances:
8356 if not self.op.static:
8357 remote_info = self.rpc.call_instance_info(instance.primary_node,
8359 instance.hypervisor)
8360 remote_info.Raise("Error checking node %s" % instance.primary_node)
8361 remote_info = remote_info.payload
8362 if remote_info and "state" in remote_info:
8365 remote_state = "down"
8368 if instance.admin_up:
8371 config_state = "down"
8373 disks = [self._ComputeDiskStatus(instance, None, device)
8374 for device in instance.disks]
8377 "name": instance.name,
8378 "config_state": config_state,
8379 "run_state": remote_state,
8380 "pnode": instance.primary_node,
8381 "snodes": instance.secondary_nodes,
8383 # this happens to be the same format used for hooks
8384 "nics": _NICListToTuple(self, instance.nics),
8385 "disk_template": instance.disk_template,
8387 "hypervisor": instance.hypervisor,
8388 "network_port": instance.network_port,
8389 "hv_instance": instance.hvparams,
8390 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8391 "be_instance": instance.beparams,
8392 "be_actual": cluster.FillBE(instance),
8393 "os_instance": instance.osparams,
8394 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8395 "serial_no": instance.serial_no,
8396 "mtime": instance.mtime,
8397 "ctime": instance.ctime,
8398 "uuid": instance.uuid,
8401 result[instance.name] = idict
8406 class LUSetInstanceParams(LogicalUnit):
8407 """Modifies an instances's parameters.
8410 HPATH = "instance-modify"
8411 HTYPE = constants.HTYPE_INSTANCE
8412 _OP_REQP = [("instance_name", _TNEString)]
8414 ("nics", _EmptyList),
8415 ("disks", _EmptyList),
8416 ("beparams", _EmptyDict),
8417 ("hvparams", _EmptyDict),
8418 ("disk_template", None),
8419 ("remote_node", None),
8421 ("force_variant", False),
8427 def CheckArguments(self):
8428 if not (self.op.nics or self.op.disks or self.op.disk_template or
8429 self.op.hvparams or self.op.beparams or self.op.os_name):
8430 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8432 if self.op.hvparams:
8433 _CheckGlobalHvParams(self.op.hvparams)
8437 for disk_op, disk_dict in self.op.disks:
8438 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8439 if disk_op == constants.DDM_REMOVE:
8442 elif disk_op == constants.DDM_ADD:
8445 if not isinstance(disk_op, int):
8446 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8447 if not isinstance(disk_dict, dict):
8448 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8449 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8451 if disk_op == constants.DDM_ADD:
8452 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8453 if mode not in constants.DISK_ACCESS_SET:
8454 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8456 size = disk_dict.get('size', None)
8458 raise errors.OpPrereqError("Required disk parameter size missing",
8462 except (TypeError, ValueError), err:
8463 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8464 str(err), errors.ECODE_INVAL)
8465 disk_dict['size'] = size
8467 # modification of disk
8468 if 'size' in disk_dict:
8469 raise errors.OpPrereqError("Disk size change not possible, use"
8470 " grow-disk", errors.ECODE_INVAL)
8472 if disk_addremove > 1:
8473 raise errors.OpPrereqError("Only one disk add or remove operation"
8474 " supported at a time", errors.ECODE_INVAL)
8476 if self.op.disks and self.op.disk_template is not None:
8477 raise errors.OpPrereqError("Disk template conversion and other disk"
8478 " changes not supported at the same time",
8481 if self.op.disk_template:
8482 _CheckDiskTemplate(self.op.disk_template)
8483 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8484 self.op.remote_node is None):
8485 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8486 " one requires specifying a secondary node",
8491 for nic_op, nic_dict in self.op.nics:
8492 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8493 if nic_op == constants.DDM_REMOVE:
8496 elif nic_op == constants.DDM_ADD:
8499 if not isinstance(nic_op, int):
8500 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8501 if not isinstance(nic_dict, dict):
8502 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8503 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8505 # nic_dict should be a dict
8506 nic_ip = nic_dict.get('ip', None)
8507 if nic_ip is not None:
8508 if nic_ip.lower() == constants.VALUE_NONE:
8509 nic_dict['ip'] = None
8511 if not utils.IsValidIP(nic_ip):
8512 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8515 nic_bridge = nic_dict.get('bridge', None)
8516 nic_link = nic_dict.get('link', None)
8517 if nic_bridge and nic_link:
8518 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8519 " at the same time", errors.ECODE_INVAL)
8520 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8521 nic_dict['bridge'] = None
8522 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8523 nic_dict['link'] = None
8525 if nic_op == constants.DDM_ADD:
8526 nic_mac = nic_dict.get('mac', None)
8528 nic_dict['mac'] = constants.VALUE_AUTO
8530 if 'mac' in nic_dict:
8531 nic_mac = nic_dict['mac']
8532 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8533 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8535 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8536 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8537 " modifying an existing nic",
8540 if nic_addremove > 1:
8541 raise errors.OpPrereqError("Only one NIC add or remove operation"
8542 " supported at a time", errors.ECODE_INVAL)
8544 def ExpandNames(self):
8545 self._ExpandAndLockInstance()
8546 self.needed_locks[locking.LEVEL_NODE] = []
8547 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8549 def DeclareLocks(self, level):
8550 if level == locking.LEVEL_NODE:
8551 self._LockInstancesNodes()
8552 if self.op.disk_template and self.op.remote_node:
8553 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8554 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8556 def BuildHooksEnv(self):
8559 This runs on the master, primary and secondaries.
8563 if constants.BE_MEMORY in self.be_new:
8564 args['memory'] = self.be_new[constants.BE_MEMORY]
8565 if constants.BE_VCPUS in self.be_new:
8566 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8567 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8568 # information at all.
8571 nic_override = dict(self.op.nics)
8572 for idx, nic in enumerate(self.instance.nics):
8573 if idx in nic_override:
8574 this_nic_override = nic_override[idx]
8576 this_nic_override = {}
8577 if 'ip' in this_nic_override:
8578 ip = this_nic_override['ip']
8581 if 'mac' in this_nic_override:
8582 mac = this_nic_override['mac']
8585 if idx in self.nic_pnew:
8586 nicparams = self.nic_pnew[idx]
8588 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8589 mode = nicparams[constants.NIC_MODE]
8590 link = nicparams[constants.NIC_LINK]
8591 args['nics'].append((ip, mac, mode, link))
8592 if constants.DDM_ADD in nic_override:
8593 ip = nic_override[constants.DDM_ADD].get('ip', None)
8594 mac = nic_override[constants.DDM_ADD]['mac']
8595 nicparams = self.nic_pnew[constants.DDM_ADD]
8596 mode = nicparams[constants.NIC_MODE]
8597 link = nicparams[constants.NIC_LINK]
8598 args['nics'].append((ip, mac, mode, link))
8599 elif constants.DDM_REMOVE in nic_override:
8600 del args['nics'][-1]
8602 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8603 if self.op.disk_template:
8604 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8605 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8608 def CheckPrereq(self):
8609 """Check prerequisites.
8611 This only checks the instance list against the existing names.
8614 # checking the new params on the primary/secondary nodes
8616 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8617 cluster = self.cluster = self.cfg.GetClusterInfo()
8618 assert self.instance is not None, \
8619 "Cannot retrieve locked instance %s" % self.op.instance_name
8620 pnode = instance.primary_node
8621 nodelist = list(instance.all_nodes)
8624 if self.op.os_name and not self.op.force:
8625 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8626 self.op.force_variant)
8627 instance_os = self.op.os_name
8629 instance_os = instance.os
8631 if self.op.disk_template:
8632 if instance.disk_template == self.op.disk_template:
8633 raise errors.OpPrereqError("Instance already has disk template %s" %
8634 instance.disk_template, errors.ECODE_INVAL)
8636 if (instance.disk_template,
8637 self.op.disk_template) not in self._DISK_CONVERSIONS:
8638 raise errors.OpPrereqError("Unsupported disk template conversion from"
8639 " %s to %s" % (instance.disk_template,
8640 self.op.disk_template),
8642 if self.op.disk_template in constants.DTS_NET_MIRROR:
8643 _CheckNodeOnline(self, self.op.remote_node)
8644 _CheckNodeNotDrained(self, self.op.remote_node)
8645 disks = [{"size": d.size} for d in instance.disks]
8646 required = _ComputeDiskSize(self.op.disk_template, disks)
8647 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8648 _CheckInstanceDown(self, instance, "cannot change disk template")
8650 # hvparams processing
8651 if self.op.hvparams:
8652 hv_type = instance.hypervisor
8653 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8654 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8655 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8658 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8659 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8660 self.hv_new = hv_new # the new actual values
8661 self.hv_inst = i_hvdict # the new dict (without defaults)
8663 self.hv_new = self.hv_inst = {}
8665 # beparams processing
8666 if self.op.beparams:
8667 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8669 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8670 be_new = cluster.SimpleFillBE(i_bedict)
8671 self.be_new = be_new # the new actual values
8672 self.be_inst = i_bedict # the new dict (without defaults)
8674 self.be_new = self.be_inst = {}
8676 # osparams processing
8677 if self.op.osparams:
8678 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8679 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8680 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8681 self.os_inst = i_osdict # the new dict (without defaults)
8683 self.os_new = self.os_inst = {}
8687 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8688 mem_check_list = [pnode]
8689 if be_new[constants.BE_AUTO_BALANCE]:
8690 # either we changed auto_balance to yes or it was from before
8691 mem_check_list.extend(instance.secondary_nodes)
8692 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8693 instance.hypervisor)
8694 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8695 instance.hypervisor)
8696 pninfo = nodeinfo[pnode]
8697 msg = pninfo.fail_msg
8699 # Assume the primary node is unreachable and go ahead
8700 self.warn.append("Can't get info from primary node %s: %s" %
8702 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8703 self.warn.append("Node data from primary node %s doesn't contain"
8704 " free memory information" % pnode)
8705 elif instance_info.fail_msg:
8706 self.warn.append("Can't get instance runtime information: %s" %
8707 instance_info.fail_msg)
8709 if instance_info.payload:
8710 current_mem = int(instance_info.payload['memory'])
8712 # Assume instance not running
8713 # (there is a slight race condition here, but it's not very probable,
8714 # and we have no other way to check)
8716 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8717 pninfo.payload['memory_free'])
8719 raise errors.OpPrereqError("This change will prevent the instance"
8720 " from starting, due to %d MB of memory"
8721 " missing on its primary node" % miss_mem,
8724 if be_new[constants.BE_AUTO_BALANCE]:
8725 for node, nres in nodeinfo.items():
8726 if node not in instance.secondary_nodes:
8730 self.warn.append("Can't get info from secondary node %s: %s" %
8732 elif not isinstance(nres.payload.get('memory_free', None), int):
8733 self.warn.append("Secondary node %s didn't return free"
8734 " memory information" % node)
8735 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8736 self.warn.append("Not enough memory to failover instance to"
8737 " secondary node %s" % node)
8742 for nic_op, nic_dict in self.op.nics:
8743 if nic_op == constants.DDM_REMOVE:
8744 if not instance.nics:
8745 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8748 if nic_op != constants.DDM_ADD:
8750 if not instance.nics:
8751 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8752 " no NICs" % nic_op,
8754 if nic_op < 0 or nic_op >= len(instance.nics):
8755 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8757 (nic_op, len(instance.nics) - 1),
8759 old_nic_params = instance.nics[nic_op].nicparams
8760 old_nic_ip = instance.nics[nic_op].ip
8765 update_params_dict = dict([(key, nic_dict[key])
8766 for key in constants.NICS_PARAMETERS
8767 if key in nic_dict])
8769 if 'bridge' in nic_dict:
8770 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8772 new_nic_params = _GetUpdatedParams(old_nic_params,
8774 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8775 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8776 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8777 self.nic_pinst[nic_op] = new_nic_params
8778 self.nic_pnew[nic_op] = new_filled_nic_params
8779 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8781 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8782 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8783 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8785 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8787 self.warn.append(msg)
8789 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8790 if new_nic_mode == constants.NIC_MODE_ROUTED:
8791 if 'ip' in nic_dict:
8792 nic_ip = nic_dict['ip']
8796 raise errors.OpPrereqError('Cannot set the nic ip to None'
8797 ' on a routed nic', errors.ECODE_INVAL)
8798 if 'mac' in nic_dict:
8799 nic_mac = nic_dict['mac']
8801 raise errors.OpPrereqError('Cannot set the nic mac to None',
8803 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8804 # otherwise generate the mac
8805 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8807 # or validate/reserve the current one
8809 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8810 except errors.ReservationError:
8811 raise errors.OpPrereqError("MAC address %s already in use"
8812 " in cluster" % nic_mac,
8813 errors.ECODE_NOTUNIQUE)
8816 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8817 raise errors.OpPrereqError("Disk operations not supported for"
8818 " diskless instances",
8820 for disk_op, _ in self.op.disks:
8821 if disk_op == constants.DDM_REMOVE:
8822 if len(instance.disks) == 1:
8823 raise errors.OpPrereqError("Cannot remove the last disk of"
8824 " an instance", errors.ECODE_INVAL)
8825 _CheckInstanceDown(self, instance, "cannot remove disks")
8827 if (disk_op == constants.DDM_ADD and
8828 len(instance.nics) >= constants.MAX_DISKS):
8829 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8830 " add more" % constants.MAX_DISKS,
8832 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8834 if disk_op < 0 or disk_op >= len(instance.disks):
8835 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8837 (disk_op, len(instance.disks)),
8842 def _ConvertPlainToDrbd(self, feedback_fn):
8843 """Converts an instance from plain to drbd.
8846 feedback_fn("Converting template to drbd")
8847 instance = self.instance
8848 pnode = instance.primary_node
8849 snode = self.op.remote_node
8851 # create a fake disk info for _GenerateDiskTemplate
8852 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8853 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8854 instance.name, pnode, [snode],
8855 disk_info, None, None, 0)
8856 info = _GetInstanceInfoText(instance)
8857 feedback_fn("Creating aditional volumes...")
8858 # first, create the missing data and meta devices
8859 for disk in new_disks:
8860 # unfortunately this is... not too nice
8861 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8863 for child in disk.children:
8864 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8865 # at this stage, all new LVs have been created, we can rename the
8867 feedback_fn("Renaming original volumes...")
8868 rename_list = [(o, n.children[0].logical_id)
8869 for (o, n) in zip(instance.disks, new_disks)]
8870 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8871 result.Raise("Failed to rename original LVs")
8873 feedback_fn("Initializing DRBD devices...")
8874 # all child devices are in place, we can now create the DRBD devices
8875 for disk in new_disks:
8876 for node in [pnode, snode]:
8877 f_create = node == pnode
8878 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8880 # at this point, the instance has been modified
8881 instance.disk_template = constants.DT_DRBD8
8882 instance.disks = new_disks
8883 self.cfg.Update(instance, feedback_fn)
8885 # disks are created, waiting for sync
8886 disk_abort = not _WaitForSync(self, instance)
8888 raise errors.OpExecError("There are some degraded disks for"
8889 " this instance, please cleanup manually")
8891 def _ConvertDrbdToPlain(self, feedback_fn):
8892 """Converts an instance from drbd to plain.
8895 instance = self.instance
8896 assert len(instance.secondary_nodes) == 1
8897 pnode = instance.primary_node
8898 snode = instance.secondary_nodes[0]
8899 feedback_fn("Converting template to plain")
8901 old_disks = instance.disks
8902 new_disks = [d.children[0] for d in old_disks]
8904 # copy over size and mode
8905 for parent, child in zip(old_disks, new_disks):
8906 child.size = parent.size
8907 child.mode = parent.mode
8909 # update instance structure
8910 instance.disks = new_disks
8911 instance.disk_template = constants.DT_PLAIN
8912 self.cfg.Update(instance, feedback_fn)
8914 feedback_fn("Removing volumes on the secondary node...")
8915 for disk in old_disks:
8916 self.cfg.SetDiskID(disk, snode)
8917 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8919 self.LogWarning("Could not remove block device %s on node %s,"
8920 " continuing anyway: %s", disk.iv_name, snode, msg)
8922 feedback_fn("Removing unneeded volumes on the primary node...")
8923 for idx, disk in enumerate(old_disks):
8924 meta = disk.children[1]
8925 self.cfg.SetDiskID(meta, pnode)
8926 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8928 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8929 " continuing anyway: %s", idx, pnode, msg)
8932 def Exec(self, feedback_fn):
8933 """Modifies an instance.
8935 All parameters take effect only at the next restart of the instance.
8938 # Process here the warnings from CheckPrereq, as we don't have a
8939 # feedback_fn there.
8940 for warn in self.warn:
8941 feedback_fn("WARNING: %s" % warn)
8944 instance = self.instance
8946 for disk_op, disk_dict in self.op.disks:
8947 if disk_op == constants.DDM_REMOVE:
8948 # remove the last disk
8949 device = instance.disks.pop()
8950 device_idx = len(instance.disks)
8951 for node, disk in device.ComputeNodeTree(instance.primary_node):
8952 self.cfg.SetDiskID(disk, node)
8953 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8955 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8956 " continuing anyway", device_idx, node, msg)
8957 result.append(("disk/%d" % device_idx, "remove"))
8958 elif disk_op == constants.DDM_ADD:
8960 if instance.disk_template == constants.DT_FILE:
8961 file_driver, file_path = instance.disks[0].logical_id
8962 file_path = os.path.dirname(file_path)
8964 file_driver = file_path = None
8965 disk_idx_base = len(instance.disks)
8966 new_disk = _GenerateDiskTemplate(self,
8967 instance.disk_template,
8968 instance.name, instance.primary_node,
8969 instance.secondary_nodes,
8974 instance.disks.append(new_disk)
8975 info = _GetInstanceInfoText(instance)
8977 logging.info("Creating volume %s for instance %s",
8978 new_disk.iv_name, instance.name)
8979 # Note: this needs to be kept in sync with _CreateDisks
8981 for node in instance.all_nodes:
8982 f_create = node == instance.primary_node
8984 _CreateBlockDev(self, node, instance, new_disk,
8985 f_create, info, f_create)
8986 except errors.OpExecError, err:
8987 self.LogWarning("Failed to create volume %s (%s) on"
8989 new_disk.iv_name, new_disk, node, err)
8990 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8991 (new_disk.size, new_disk.mode)))
8993 # change a given disk
8994 instance.disks[disk_op].mode = disk_dict['mode']
8995 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8997 if self.op.disk_template:
8998 r_shut = _ShutdownInstanceDisks(self, instance)
9000 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9001 " proceed with disk template conversion")
9002 mode = (instance.disk_template, self.op.disk_template)
9004 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9006 self.cfg.ReleaseDRBDMinors(instance.name)
9008 result.append(("disk_template", self.op.disk_template))
9011 for nic_op, nic_dict in self.op.nics:
9012 if nic_op == constants.DDM_REMOVE:
9013 # remove the last nic
9014 del instance.nics[-1]
9015 result.append(("nic.%d" % len(instance.nics), "remove"))
9016 elif nic_op == constants.DDM_ADD:
9017 # mac and bridge should be set, by now
9018 mac = nic_dict['mac']
9019 ip = nic_dict.get('ip', None)
9020 nicparams = self.nic_pinst[constants.DDM_ADD]
9021 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9022 instance.nics.append(new_nic)
9023 result.append(("nic.%d" % (len(instance.nics) - 1),
9024 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9025 (new_nic.mac, new_nic.ip,
9026 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9027 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9030 for key in 'mac', 'ip':
9032 setattr(instance.nics[nic_op], key, nic_dict[key])
9033 if nic_op in self.nic_pinst:
9034 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9035 for key, val in nic_dict.iteritems():
9036 result.append(("nic.%s/%d" % (key, nic_op), val))
9039 if self.op.hvparams:
9040 instance.hvparams = self.hv_inst
9041 for key, val in self.op.hvparams.iteritems():
9042 result.append(("hv/%s" % key, val))
9045 if self.op.beparams:
9046 instance.beparams = self.be_inst
9047 for key, val in self.op.beparams.iteritems():
9048 result.append(("be/%s" % key, val))
9052 instance.os = self.op.os_name
9055 if self.op.osparams:
9056 instance.osparams = self.os_inst
9057 for key, val in self.op.osparams.iteritems():
9058 result.append(("os/%s" % key, val))
9060 self.cfg.Update(instance, feedback_fn)
9064 _DISK_CONVERSIONS = {
9065 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9066 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9070 class LUQueryExports(NoHooksLU):
9071 """Query the exports list
9074 _OP_REQP = [("nodes", _TListOf(_TNEString))]
9077 def ExpandNames(self):
9078 self.needed_locks = {}
9079 self.share_locks[locking.LEVEL_NODE] = 1
9080 if not self.op.nodes:
9081 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9083 self.needed_locks[locking.LEVEL_NODE] = \
9084 _GetWantedNodes(self, self.op.nodes)
9086 def Exec(self, feedback_fn):
9087 """Compute the list of all the exported system images.
9090 @return: a dictionary with the structure node->(export-list)
9091 where export-list is a list of the instances exported on
9095 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9096 rpcresult = self.rpc.call_export_list(self.nodes)
9098 for node in rpcresult:
9099 if rpcresult[node].fail_msg:
9100 result[node] = False
9102 result[node] = rpcresult[node].payload
9107 class LUPrepareExport(NoHooksLU):
9108 """Prepares an instance for an export and returns useful information.
9112 ("instance_name", _TNEString),
9113 ("mode", _TElemOf(constants.EXPORT_MODES)),
9117 def ExpandNames(self):
9118 self._ExpandAndLockInstance()
9120 def CheckPrereq(self):
9121 """Check prerequisites.
9124 instance_name = self.op.instance_name
9126 self.instance = self.cfg.GetInstanceInfo(instance_name)
9127 assert self.instance is not None, \
9128 "Cannot retrieve locked instance %s" % self.op.instance_name
9129 _CheckNodeOnline(self, self.instance.primary_node)
9131 self._cds = _GetClusterDomainSecret()
9133 def Exec(self, feedback_fn):
9134 """Prepares an instance for an export.
9137 instance = self.instance
9139 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9140 salt = utils.GenerateSecret(8)
9142 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9143 result = self.rpc.call_x509_cert_create(instance.primary_node,
9144 constants.RIE_CERT_VALIDITY)
9145 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9147 (name, cert_pem) = result.payload
9149 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9153 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9154 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9156 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9162 class LUExportInstance(LogicalUnit):
9163 """Export an instance to an image in the cluster.
9166 HPATH = "instance-export"
9167 HTYPE = constants.HTYPE_INSTANCE
9169 ("instance_name", _TNEString),
9170 ("target_node", _TNEString),
9171 ("shutdown", _TBool),
9172 ("mode", _TElemOf(constants.EXPORT_MODES)),
9175 ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9176 ("remove_instance", False),
9177 ("ignore_remove_failures", False),
9178 ("mode", constants.EXPORT_MODE_LOCAL),
9179 ("x509_key_name", None),
9180 ("destination_x509_ca", None),
9184 def CheckArguments(self):
9185 """Check the arguments.
9188 self.x509_key_name = self.op.x509_key_name
9189 self.dest_x509_ca_pem = self.op.destination_x509_ca
9191 if self.op.remove_instance and not self.op.shutdown:
9192 raise errors.OpPrereqError("Can not remove instance without shutting it"
9195 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9196 if not self.x509_key_name:
9197 raise errors.OpPrereqError("Missing X509 key name for encryption",
9200 if not self.dest_x509_ca_pem:
9201 raise errors.OpPrereqError("Missing destination X509 CA",
9204 def ExpandNames(self):
9205 self._ExpandAndLockInstance()
9207 # Lock all nodes for local exports
9208 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9209 # FIXME: lock only instance primary and destination node
9211 # Sad but true, for now we have do lock all nodes, as we don't know where
9212 # the previous export might be, and in this LU we search for it and
9213 # remove it from its current node. In the future we could fix this by:
9214 # - making a tasklet to search (share-lock all), then create the
9215 # new one, then one to remove, after
9216 # - removing the removal operation altogether
9217 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9219 def DeclareLocks(self, level):
9220 """Last minute lock declaration."""
9221 # All nodes are locked anyway, so nothing to do here.
9223 def BuildHooksEnv(self):
9226 This will run on the master, primary node and target node.
9230 "EXPORT_MODE": self.op.mode,
9231 "EXPORT_NODE": self.op.target_node,
9232 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9233 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9234 # TODO: Generic function for boolean env variables
9235 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9238 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9240 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9242 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9243 nl.append(self.op.target_node)
9247 def CheckPrereq(self):
9248 """Check prerequisites.
9250 This checks that the instance and node names are valid.
9253 instance_name = self.op.instance_name
9255 self.instance = self.cfg.GetInstanceInfo(instance_name)
9256 assert self.instance is not None, \
9257 "Cannot retrieve locked instance %s" % self.op.instance_name
9258 _CheckNodeOnline(self, self.instance.primary_node)
9260 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9261 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9262 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9263 assert self.dst_node is not None
9265 _CheckNodeOnline(self, self.dst_node.name)
9266 _CheckNodeNotDrained(self, self.dst_node.name)
9269 self.dest_disk_info = None
9270 self.dest_x509_ca = None
9272 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9273 self.dst_node = None
9275 if len(self.op.target_node) != len(self.instance.disks):
9276 raise errors.OpPrereqError(("Received destination information for %s"
9277 " disks, but instance %s has %s disks") %
9278 (len(self.op.target_node), instance_name,
9279 len(self.instance.disks)),
9282 cds = _GetClusterDomainSecret()
9284 # Check X509 key name
9286 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9287 except (TypeError, ValueError), err:
9288 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9290 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9291 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9294 # Load and verify CA
9296 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9297 except OpenSSL.crypto.Error, err:
9298 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9299 (err, ), errors.ECODE_INVAL)
9301 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9302 if errcode is not None:
9303 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9304 (msg, ), errors.ECODE_INVAL)
9306 self.dest_x509_ca = cert
9308 # Verify target information
9310 for idx, disk_data in enumerate(self.op.target_node):
9312 (host, port, magic) = \
9313 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9314 except errors.GenericError, err:
9315 raise errors.OpPrereqError("Target info for disk %s: %s" %
9316 (idx, err), errors.ECODE_INVAL)
9318 disk_info.append((host, port, magic))
9320 assert len(disk_info) == len(self.op.target_node)
9321 self.dest_disk_info = disk_info
9324 raise errors.ProgrammerError("Unhandled export mode %r" %
9327 # instance disk type verification
9328 # TODO: Implement export support for file-based disks
9329 for disk in self.instance.disks:
9330 if disk.dev_type == constants.LD_FILE:
9331 raise errors.OpPrereqError("Export not supported for instances with"
9332 " file-based disks", errors.ECODE_INVAL)
9334 def _CleanupExports(self, feedback_fn):
9335 """Removes exports of current instance from all other nodes.
9337 If an instance in a cluster with nodes A..D was exported to node C, its
9338 exports will be removed from the nodes A, B and D.
9341 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9343 nodelist = self.cfg.GetNodeList()
9344 nodelist.remove(self.dst_node.name)
9346 # on one-node clusters nodelist will be empty after the removal
9347 # if we proceed the backup would be removed because OpQueryExports
9348 # substitutes an empty list with the full cluster node list.
9349 iname = self.instance.name
9351 feedback_fn("Removing old exports for instance %s" % iname)
9352 exportlist = self.rpc.call_export_list(nodelist)
9353 for node in exportlist:
9354 if exportlist[node].fail_msg:
9356 if iname in exportlist[node].payload:
9357 msg = self.rpc.call_export_remove(node, iname).fail_msg
9359 self.LogWarning("Could not remove older export for instance %s"
9360 " on node %s: %s", iname, node, msg)
9362 def Exec(self, feedback_fn):
9363 """Export an instance to an image in the cluster.
9366 assert self.op.mode in constants.EXPORT_MODES
9368 instance = self.instance
9369 src_node = instance.primary_node
9371 if self.op.shutdown:
9372 # shutdown the instance, but not the disks
9373 feedback_fn("Shutting down instance %s" % instance.name)
9374 result = self.rpc.call_instance_shutdown(src_node, instance,
9375 self.op.shutdown_timeout)
9376 # TODO: Maybe ignore failures if ignore_remove_failures is set
9377 result.Raise("Could not shutdown instance %s on"
9378 " node %s" % (instance.name, src_node))
9380 # set the disks ID correctly since call_instance_start needs the
9381 # correct drbd minor to create the symlinks
9382 for disk in instance.disks:
9383 self.cfg.SetDiskID(disk, src_node)
9385 activate_disks = (not instance.admin_up)
9388 # Activate the instance disks if we'exporting a stopped instance
9389 feedback_fn("Activating disks for %s" % instance.name)
9390 _StartInstanceDisks(self, instance, None)
9393 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9396 helper.CreateSnapshots()
9398 if (self.op.shutdown and instance.admin_up and
9399 not self.op.remove_instance):
9400 assert not activate_disks
9401 feedback_fn("Starting instance %s" % instance.name)
9402 result = self.rpc.call_instance_start(src_node, instance, None, None)
9403 msg = result.fail_msg
9405 feedback_fn("Failed to start instance: %s" % msg)
9406 _ShutdownInstanceDisks(self, instance)
9407 raise errors.OpExecError("Could not start instance: %s" % msg)
9409 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9410 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9411 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9412 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9413 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9415 (key_name, _, _) = self.x509_key_name
9418 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9421 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9422 key_name, dest_ca_pem,
9427 # Check for backwards compatibility
9428 assert len(dresults) == len(instance.disks)
9429 assert compat.all(isinstance(i, bool) for i in dresults), \
9430 "Not all results are boolean: %r" % dresults
9434 feedback_fn("Deactivating disks for %s" % instance.name)
9435 _ShutdownInstanceDisks(self, instance)
9437 # Remove instance if requested
9438 if self.op.remove_instance:
9439 if not (compat.all(dresults) and fin_resu):
9440 feedback_fn("Not removing instance %s as parts of the export failed" %
9443 feedback_fn("Removing instance %s" % instance.name)
9444 _RemoveInstance(self, feedback_fn, instance,
9445 self.op.ignore_remove_failures)
9447 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9448 self._CleanupExports(feedback_fn)
9450 return fin_resu, dresults
9453 class LURemoveExport(NoHooksLU):
9454 """Remove exports related to the named instance.
9457 _OP_REQP = [("instance_name", _TNEString)]
9460 def ExpandNames(self):
9461 self.needed_locks = {}
9462 # We need all nodes to be locked in order for RemoveExport to work, but we
9463 # don't need to lock the instance itself, as nothing will happen to it (and
9464 # we can remove exports also for a removed instance)
9465 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9467 def Exec(self, feedback_fn):
9468 """Remove any export.
9471 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9472 # If the instance was not found we'll try with the name that was passed in.
9473 # This will only work if it was an FQDN, though.
9475 if not instance_name:
9477 instance_name = self.op.instance_name
9479 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9480 exportlist = self.rpc.call_export_list(locked_nodes)
9482 for node in exportlist:
9483 msg = exportlist[node].fail_msg
9485 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9487 if instance_name in exportlist[node].payload:
9489 result = self.rpc.call_export_remove(node, instance_name)
9490 msg = result.fail_msg
9492 logging.error("Could not remove export for instance %s"
9493 " on node %s: %s", instance_name, node, msg)
9495 if fqdn_warn and not found:
9496 feedback_fn("Export not found. If trying to remove an export belonging"
9497 " to a deleted instance please use its Fully Qualified"
9501 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9504 This is an abstract class which is the parent of all the other tags LUs.
9508 def ExpandNames(self):
9509 self.needed_locks = {}
9510 if self.op.kind == constants.TAG_NODE:
9511 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9512 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9513 elif self.op.kind == constants.TAG_INSTANCE:
9514 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9515 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9517 def CheckPrereq(self):
9518 """Check prerequisites.
9521 if self.op.kind == constants.TAG_CLUSTER:
9522 self.target = self.cfg.GetClusterInfo()
9523 elif self.op.kind == constants.TAG_NODE:
9524 self.target = self.cfg.GetNodeInfo(self.op.name)
9525 elif self.op.kind == constants.TAG_INSTANCE:
9526 self.target = self.cfg.GetInstanceInfo(self.op.name)
9528 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9529 str(self.op.kind), errors.ECODE_INVAL)
9532 class LUGetTags(TagsLU):
9533 """Returns the tags of a given object.
9537 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9538 ("name", _TNEString),
9542 def Exec(self, feedback_fn):
9543 """Returns the tag list.
9546 return list(self.target.GetTags())
9549 class LUSearchTags(NoHooksLU):
9550 """Searches the tags for a given pattern.
9553 _OP_REQP = [("pattern", _TNEString)]
9556 def ExpandNames(self):
9557 self.needed_locks = {}
9559 def CheckPrereq(self):
9560 """Check prerequisites.
9562 This checks the pattern passed for validity by compiling it.
9566 self.re = re.compile(self.op.pattern)
9567 except re.error, err:
9568 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9569 (self.op.pattern, err), errors.ECODE_INVAL)
9571 def Exec(self, feedback_fn):
9572 """Returns the tag list.
9576 tgts = [("/cluster", cfg.GetClusterInfo())]
9577 ilist = cfg.GetAllInstancesInfo().values()
9578 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9579 nlist = cfg.GetAllNodesInfo().values()
9580 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9582 for path, target in tgts:
9583 for tag in target.GetTags():
9584 if self.re.search(tag):
9585 results.append((path, tag))
9589 class LUAddTags(TagsLU):
9590 """Sets a tag on a given object.
9594 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9595 ("name", _TNEString),
9596 ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9600 def CheckPrereq(self):
9601 """Check prerequisites.
9603 This checks the type and length of the tag name and value.
9606 TagsLU.CheckPrereq(self)
9607 for tag in self.op.tags:
9608 objects.TaggableObject.ValidateTag(tag)
9610 def Exec(self, feedback_fn):
9615 for tag in self.op.tags:
9616 self.target.AddTag(tag)
9617 except errors.TagError, err:
9618 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9619 self.cfg.Update(self.target, feedback_fn)
9622 class LUDelTags(TagsLU):
9623 """Delete a list of tags from a given object.
9627 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9628 ("name", _TNEString),
9629 ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9633 def CheckPrereq(self):
9634 """Check prerequisites.
9636 This checks that we have the given tag.
9639 TagsLU.CheckPrereq(self)
9640 for tag in self.op.tags:
9641 objects.TaggableObject.ValidateTag(tag)
9642 del_tags = frozenset(self.op.tags)
9643 cur_tags = self.target.GetTags()
9644 if not del_tags <= cur_tags:
9645 diff_tags = del_tags - cur_tags
9646 diff_names = ["'%s'" % tag for tag in diff_tags]
9648 raise errors.OpPrereqError("Tag(s) %s not found" %
9649 (",".join(diff_names)), errors.ECODE_NOENT)
9651 def Exec(self, feedback_fn):
9652 """Remove the tag from the object.
9655 for tag in self.op.tags:
9656 self.target.RemoveTag(tag)
9657 self.cfg.Update(self.target, feedback_fn)
9660 class LUTestDelay(NoHooksLU):
9661 """Sleep for a specified amount of time.
9663 This LU sleeps on the master and/or nodes for a specified amount of
9668 ("duration", _TFloat),
9669 ("on_master", _TBool),
9670 ("on_nodes", _TListOf(_TNEString)),
9674 def CheckArguments(self):
9675 # TODO: convert to the type system
9676 self.op.repeat = getattr(self.op, "repeat", 0)
9677 if self.op.repeat < 0:
9678 raise errors.OpPrereqError("Repetition count cannot be negative")
9680 def ExpandNames(self):
9681 """Expand names and set required locks.
9683 This expands the node list, if any.
9686 self.needed_locks = {}
9687 if self.op.on_nodes:
9688 # _GetWantedNodes can be used here, but is not always appropriate to use
9689 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9691 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9692 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9694 def _TestDelay(self):
9695 """Do the actual sleep.
9698 if self.op.on_master:
9699 if not utils.TestDelay(self.op.duration):
9700 raise errors.OpExecError("Error during master delay test")
9701 if self.op.on_nodes:
9702 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9703 for node, node_result in result.items():
9704 node_result.Raise("Failure during rpc call to node %s" % node)
9706 def Exec(self, feedback_fn):
9707 """Execute the test delay opcode, with the wanted repetitions.
9710 if self.op.repeat == 0:
9713 top_value = self.op.repeat - 1
9714 for i in range(self.op.repeat):
9715 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9719 class IAllocator(object):
9720 """IAllocator framework.
9722 An IAllocator instance has three sets of attributes:
9723 - cfg that is needed to query the cluster
9724 - input data (all members of the _KEYS class attribute are required)
9725 - four buffer attributes (in|out_data|text), that represent the
9726 input (to the external script) in text and data structure format,
9727 and the output from it, again in two formats
9728 - the result variables from the script (success, info, nodes) for
9732 # pylint: disable-msg=R0902
9733 # lots of instance attributes
9735 "name", "mem_size", "disks", "disk_template",
9736 "os", "tags", "nics", "vcpus", "hypervisor",
9739 "name", "relocate_from",
9745 def __init__(self, cfg, rpc, mode, **kwargs):
9748 # init buffer variables
9749 self.in_text = self.out_text = self.in_data = self.out_data = None
9750 # init all input fields so that pylint is happy
9752 self.mem_size = self.disks = self.disk_template = None
9753 self.os = self.tags = self.nics = self.vcpus = None
9754 self.hypervisor = None
9755 self.relocate_from = None
9757 self.evac_nodes = None
9759 self.required_nodes = None
9760 # init result fields
9761 self.success = self.info = self.result = None
9762 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9763 keyset = self._ALLO_KEYS
9764 fn = self._AddNewInstance
9765 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9766 keyset = self._RELO_KEYS
9767 fn = self._AddRelocateInstance
9768 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9769 keyset = self._EVAC_KEYS
9770 fn = self._AddEvacuateNodes
9772 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9773 " IAllocator" % self.mode)
9775 if key not in keyset:
9776 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9777 " IAllocator" % key)
9778 setattr(self, key, kwargs[key])
9781 if key not in kwargs:
9782 raise errors.ProgrammerError("Missing input parameter '%s' to"
9783 " IAllocator" % key)
9784 self._BuildInputData(fn)
9786 def _ComputeClusterData(self):
9787 """Compute the generic allocator input data.
9789 This is the data that is independent of the actual operation.
9793 cluster_info = cfg.GetClusterInfo()
9796 "version": constants.IALLOCATOR_VERSION,
9797 "cluster_name": cfg.GetClusterName(),
9798 "cluster_tags": list(cluster_info.GetTags()),
9799 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9800 # we don't have job IDs
9802 iinfo = cfg.GetAllInstancesInfo().values()
9803 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9807 node_list = cfg.GetNodeList()
9809 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9810 hypervisor_name = self.hypervisor
9811 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9812 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9813 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9814 hypervisor_name = cluster_info.enabled_hypervisors[0]
9816 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9819 self.rpc.call_all_instances_info(node_list,
9820 cluster_info.enabled_hypervisors)
9821 for nname, nresult in node_data.items():
9822 # first fill in static (config-based) values
9823 ninfo = cfg.GetNodeInfo(nname)
9825 "tags": list(ninfo.GetTags()),
9826 "primary_ip": ninfo.primary_ip,
9827 "secondary_ip": ninfo.secondary_ip,
9828 "offline": ninfo.offline,
9829 "drained": ninfo.drained,
9830 "master_candidate": ninfo.master_candidate,
9833 if not (ninfo.offline or ninfo.drained):
9834 nresult.Raise("Can't get data for node %s" % nname)
9835 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9837 remote_info = nresult.payload
9839 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9840 'vg_size', 'vg_free', 'cpu_total']:
9841 if attr not in remote_info:
9842 raise errors.OpExecError("Node '%s' didn't return attribute"
9843 " '%s'" % (nname, attr))
9844 if not isinstance(remote_info[attr], int):
9845 raise errors.OpExecError("Node '%s' returned invalid value"
9847 (nname, attr, remote_info[attr]))
9848 # compute memory used by primary instances
9849 i_p_mem = i_p_up_mem = 0
9850 for iinfo, beinfo in i_list:
9851 if iinfo.primary_node == nname:
9852 i_p_mem += beinfo[constants.BE_MEMORY]
9853 if iinfo.name not in node_iinfo[nname].payload:
9856 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9857 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9858 remote_info['memory_free'] -= max(0, i_mem_diff)
9861 i_p_up_mem += beinfo[constants.BE_MEMORY]
9863 # compute memory used by instances
9865 "total_memory": remote_info['memory_total'],
9866 "reserved_memory": remote_info['memory_dom0'],
9867 "free_memory": remote_info['memory_free'],
9868 "total_disk": remote_info['vg_size'],
9869 "free_disk": remote_info['vg_free'],
9870 "total_cpus": remote_info['cpu_total'],
9871 "i_pri_memory": i_p_mem,
9872 "i_pri_up_memory": i_p_up_mem,
9876 node_results[nname] = pnr
9877 data["nodes"] = node_results
9881 for iinfo, beinfo in i_list:
9883 for nic in iinfo.nics:
9884 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9885 nic_dict = {"mac": nic.mac,
9887 "mode": filled_params[constants.NIC_MODE],
9888 "link": filled_params[constants.NIC_LINK],
9890 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9891 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9892 nic_data.append(nic_dict)
9894 "tags": list(iinfo.GetTags()),
9895 "admin_up": iinfo.admin_up,
9896 "vcpus": beinfo[constants.BE_VCPUS],
9897 "memory": beinfo[constants.BE_MEMORY],
9899 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9901 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9902 "disk_template": iinfo.disk_template,
9903 "hypervisor": iinfo.hypervisor,
9905 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9907 instance_data[iinfo.name] = pir
9909 data["instances"] = instance_data
9913 def _AddNewInstance(self):
9914 """Add new instance data to allocator structure.
9916 This in combination with _AllocatorGetClusterData will create the
9917 correct structure needed as input for the allocator.
9919 The checks for the completeness of the opcode must have already been
9923 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9925 if self.disk_template in constants.DTS_NET_MIRROR:
9926 self.required_nodes = 2
9928 self.required_nodes = 1
9931 "disk_template": self.disk_template,
9934 "vcpus": self.vcpus,
9935 "memory": self.mem_size,
9936 "disks": self.disks,
9937 "disk_space_total": disk_space,
9939 "required_nodes": self.required_nodes,
9943 def _AddRelocateInstance(self):
9944 """Add relocate instance data to allocator structure.
9946 This in combination with _IAllocatorGetClusterData will create the
9947 correct structure needed as input for the allocator.
9949 The checks for the completeness of the opcode must have already been
9953 instance = self.cfg.GetInstanceInfo(self.name)
9954 if instance is None:
9955 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9956 " IAllocator" % self.name)
9958 if instance.disk_template not in constants.DTS_NET_MIRROR:
9959 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9962 if len(instance.secondary_nodes) != 1:
9963 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9966 self.required_nodes = 1
9967 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9968 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9972 "disk_space_total": disk_space,
9973 "required_nodes": self.required_nodes,
9974 "relocate_from": self.relocate_from,
9978 def _AddEvacuateNodes(self):
9979 """Add evacuate nodes data to allocator structure.
9983 "evac_nodes": self.evac_nodes
9987 def _BuildInputData(self, fn):
9988 """Build input data structures.
9991 self._ComputeClusterData()
9994 request["type"] = self.mode
9995 self.in_data["request"] = request
9997 self.in_text = serializer.Dump(self.in_data)
9999 def Run(self, name, validate=True, call_fn=None):
10000 """Run an instance allocator and return the results.
10003 if call_fn is None:
10004 call_fn = self.rpc.call_iallocator_runner
10006 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10007 result.Raise("Failure while running the iallocator script")
10009 self.out_text = result.payload
10011 self._ValidateResult()
10013 def _ValidateResult(self):
10014 """Process the allocator results.
10016 This will process and if successful save the result in
10017 self.out_data and the other parameters.
10021 rdict = serializer.Load(self.out_text)
10022 except Exception, err:
10023 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10025 if not isinstance(rdict, dict):
10026 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10028 # TODO: remove backwards compatiblity in later versions
10029 if "nodes" in rdict and "result" not in rdict:
10030 rdict["result"] = rdict["nodes"]
10033 for key in "success", "info", "result":
10034 if key not in rdict:
10035 raise errors.OpExecError("Can't parse iallocator results:"
10036 " missing key '%s'" % key)
10037 setattr(self, key, rdict[key])
10039 if not isinstance(rdict["result"], list):
10040 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10042 self.out_data = rdict
10045 class LUTestAllocator(NoHooksLU):
10046 """Run allocator tests.
10048 This LU runs the allocator tests
10052 ("direction", _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10053 ("mode", _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10054 ("name", _TNEString),
10055 ("nics", _TOr(_TNone, _TListOf(
10056 _TDictOf(_TElemOf(["mac", "ip", "bridge"]), _TNEString)))),
10057 ("disks", _TOr(_TNone, _TList)),
10060 ("hypervisor", None),
10061 ("allocator", None),
10066 def CheckPrereq(self):
10067 """Check prerequisites.
10069 This checks the opcode parameters depending on the director and mode test.
10072 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10073 for attr in ["mem_size", "disks", "disk_template",
10074 "os", "tags", "nics", "vcpus"]:
10075 if not hasattr(self.op, attr):
10076 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10077 attr, errors.ECODE_INVAL)
10078 iname = self.cfg.ExpandInstanceName(self.op.name)
10079 if iname is not None:
10080 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10081 iname, errors.ECODE_EXISTS)
10082 if not isinstance(self.op.nics, list):
10083 raise errors.OpPrereqError("Invalid parameter 'nics'",
10084 errors.ECODE_INVAL)
10085 if not isinstance(self.op.disks, list):
10086 raise errors.OpPrereqError("Invalid parameter 'disks'",
10087 errors.ECODE_INVAL)
10088 for row in self.op.disks:
10089 if (not isinstance(row, dict) or
10090 "size" not in row or
10091 not isinstance(row["size"], int) or
10092 "mode" not in row or
10093 row["mode"] not in ['r', 'w']):
10094 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10095 " parameter", errors.ECODE_INVAL)
10096 if self.op.hypervisor is None:
10097 self.op.hypervisor = self.cfg.GetHypervisorType()
10098 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10099 fname = _ExpandInstanceName(self.cfg, self.op.name)
10100 self.op.name = fname
10101 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10102 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10103 if not hasattr(self.op, "evac_nodes"):
10104 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10105 " opcode input", errors.ECODE_INVAL)
10107 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10108 self.op.mode, errors.ECODE_INVAL)
10110 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10111 if self.op.allocator is None:
10112 raise errors.OpPrereqError("Missing allocator name",
10113 errors.ECODE_INVAL)
10114 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10115 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10116 self.op.direction, errors.ECODE_INVAL)
10118 def Exec(self, feedback_fn):
10119 """Run the allocator test.
10122 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10123 ial = IAllocator(self.cfg, self.rpc,
10126 mem_size=self.op.mem_size,
10127 disks=self.op.disks,
10128 disk_template=self.op.disk_template,
10132 vcpus=self.op.vcpus,
10133 hypervisor=self.op.hypervisor,
10135 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10136 ial = IAllocator(self.cfg, self.rpc,
10139 relocate_from=list(self.relocate_from),
10141 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10142 ial = IAllocator(self.cfg, self.rpc,
10144 evac_nodes=self.op.evac_nodes)
10146 raise errors.ProgrammerError("Uncatched mode %s in"
10147 " LUTestAllocator.Exec", self.op.mode)
10149 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10150 result = ial.in_text
10152 ial.Run(self.op.allocator, validate=False)
10153 result = ial.out_text