4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
40 from ganeti import ssh
41 from ganeti import utils
42 from ganeti import errors
43 from ganeti import hypervisor
44 from ganeti import locking
45 from ganeti import constants
46 from ganeti import objects
47 from ganeti import serializer
48 from ganeti import ssconf
49 from ganeti import uidpool
50 from ganeti import compat
51 from ganeti import masterd
53 import ganeti.masterd.instance # pylint: disable-msg=W0611
56 # Modifiable default values; need to define these here before the
60 """Returns an empty list.
67 """Returns an empty dict.
75 """Checks if the given value is not None.
78 return val is not None
82 """Checks if the given value is None.
89 """Checks if the given value is a boolean.
92 return isinstance(val, bool)
96 """Checks if the given value is an integer.
99 return isinstance(val, int)
103 """Checks if the given value is a float.
106 return isinstance(val, float)
110 """Checks if the given value is a string.
113 return isinstance(val, basestring)
117 """Checks if a given value evaluates to a boolean True value.
123 def _TElemOf(target_list):
124 """Builds a function that checks if a given value is a member of a list.
127 return lambda val: val in target_list
132 """Checks if the given value is a list.
135 return isinstance(val, list)
139 """Checks if the given value is a dictionary.
142 return isinstance(val, dict)
147 """Combine multiple functions using an AND operation.
151 return compat.all(t(val) for t in args)
156 """Combine multiple functions using an AND operation.
160 return compat.any(t(val) for t in args)
167 _TNonEmptyString = _TAnd(_TString, _TTrue)
171 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
174 def _TListOf(my_type):
175 """Checks if a given value is a list with all elements of the same type.
179 lambda lst: compat.all(lst, my_type))
182 def _TDictOf(key_type, val_type):
183 """Checks a dict type for the type of its key/values.
187 lambda my_dict: (compat.all(my_dict.keys(), key_type) and
188 compat.all(my_dict.values(), val_type)))
192 class LogicalUnit(object):
193 """Logical Unit base class.
195 Subclasses must follow these rules:
196 - implement ExpandNames
197 - implement CheckPrereq (except when tasklets are used)
198 - implement Exec (except when tasklets are used)
199 - implement BuildHooksEnv
200 - redefine HPATH and HTYPE
201 - optionally redefine their run requirements:
202 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
204 Note that all commands require root permissions.
206 @ivar dry_run_result: the value (if any) that will be returned to the caller
207 in dry-run mode (signalled by opcode dry_run parameter)
208 @cvar _OP_DEFS: a list of opcode attributes and the defaults values
209 they should get if not already existing
218 def __init__(self, processor, op, context, rpc):
219 """Constructor for LogicalUnit.
221 This needs to be overridden in derived classes in order to check op
225 self.proc = processor
227 self.cfg = context.cfg
228 self.context = context
230 # Dicts used to declare locking needs to mcpu
231 self.needed_locks = None
232 self.acquired_locks = {}
233 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
235 self.remove_locks = {}
236 # Used to force good behavior when calling helper functions
237 self.recalculate_locks = {}
240 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
241 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
242 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
243 # support for dry-run
244 self.dry_run_result = None
245 # support for generic debug attribute
246 if (not hasattr(self.op, "debug_level") or
247 not isinstance(self.op.debug_level, int)):
248 self.op.debug_level = 0
253 for aname, aval in self._OP_DEFS:
254 if not hasattr(self.op, aname):
259 setattr(self.op, aname, dval)
261 for attr_name, test in self._OP_REQP:
262 if not hasattr(op, attr_name):
263 raise errors.OpPrereqError("Required parameter '%s' missing" %
264 attr_name, errors.ECODE_INVAL)
265 attr_val = getattr(op, attr_name, None)
266 if not callable(test):
267 raise errors.ProgrammerError("Validation for parameter '%s' failed,"
268 " given type is not a proper type (%s)" %
270 if not test(attr_val):
271 raise errors.OpPrereqError("Parameter '%s' has invalid type" %
272 attr_name, errors.ECODE_INVAL)
274 self.CheckArguments()
277 """Returns the SshRunner object
281 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
284 ssh = property(fget=__GetSSH)
286 def CheckArguments(self):
287 """Check syntactic validity for the opcode arguments.
289 This method is for doing a simple syntactic check and ensure
290 validity of opcode parameters, without any cluster-related
291 checks. While the same can be accomplished in ExpandNames and/or
292 CheckPrereq, doing these separate is better because:
294 - ExpandNames is left as as purely a lock-related function
295 - CheckPrereq is run after we have acquired locks (and possible
298 The function is allowed to change the self.op attribute so that
299 later methods can no longer worry about missing parameters.
304 def ExpandNames(self):
305 """Expand names for this LU.
307 This method is called before starting to execute the opcode, and it should
308 update all the parameters of the opcode to their canonical form (e.g. a
309 short node name must be fully expanded after this method has successfully
310 completed). This way locking, hooks, logging, ecc. can work correctly.
312 LUs which implement this method must also populate the self.needed_locks
313 member, as a dict with lock levels as keys, and a list of needed lock names
316 - use an empty dict if you don't need any lock
317 - if you don't need any lock at a particular level omit that level
318 - don't put anything for the BGL level
319 - if you want all locks at a level use locking.ALL_SET as a value
321 If you need to share locks (rather than acquire them exclusively) at one
322 level you can modify self.share_locks, setting a true value (usually 1) for
323 that level. By default locks are not shared.
325 This function can also define a list of tasklets, which then will be
326 executed in order instead of the usual LU-level CheckPrereq and Exec
327 functions, if those are not defined by the LU.
331 # Acquire all nodes and one instance
332 self.needed_locks = {
333 locking.LEVEL_NODE: locking.ALL_SET,
334 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
336 # Acquire just two nodes
337 self.needed_locks = {
338 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
341 self.needed_locks = {} # No, you can't leave it to the default value None
344 # The implementation of this method is mandatory only if the new LU is
345 # concurrent, so that old LUs don't need to be changed all at the same
348 self.needed_locks = {} # Exclusive LUs don't need locks.
350 raise NotImplementedError
352 def DeclareLocks(self, level):
353 """Declare LU locking needs for a level
355 While most LUs can just declare their locking needs at ExpandNames time,
356 sometimes there's the need to calculate some locks after having acquired
357 the ones before. This function is called just before acquiring locks at a
358 particular level, but after acquiring the ones at lower levels, and permits
359 such calculations. It can be used to modify self.needed_locks, and by
360 default it does nothing.
362 This function is only called if you have something already set in
363 self.needed_locks for the level.
365 @param level: Locking level which is going to be locked
366 @type level: member of ganeti.locking.LEVELS
370 def CheckPrereq(self):
371 """Check prerequisites for this LU.
373 This method should check that the prerequisites for the execution
374 of this LU are fulfilled. It can do internode communication, but
375 it should be idempotent - no cluster or system changes are
378 The method should raise errors.OpPrereqError in case something is
379 not fulfilled. Its return value is ignored.
381 This method should also update all the parameters of the opcode to
382 their canonical form if it hasn't been done by ExpandNames before.
385 if self.tasklets is not None:
386 for (idx, tl) in enumerate(self.tasklets):
387 logging.debug("Checking prerequisites for tasklet %s/%s",
388 idx + 1, len(self.tasklets))
393 def Exec(self, feedback_fn):
396 This method should implement the actual work. It should raise
397 errors.OpExecError for failures that are somewhat dealt with in
401 if self.tasklets is not None:
402 for (idx, tl) in enumerate(self.tasklets):
403 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
406 raise NotImplementedError
408 def BuildHooksEnv(self):
409 """Build hooks environment for this LU.
411 This method should return a three-node tuple consisting of: a dict
412 containing the environment that will be used for running the
413 specific hook for this LU, a list of node names on which the hook
414 should run before the execution, and a list of node names on which
415 the hook should run after the execution.
417 The keys of the dict must not have 'GANETI_' prefixed as this will
418 be handled in the hooks runner. Also note additional keys will be
419 added by the hooks runner. If the LU doesn't define any
420 environment, an empty dict (and not None) should be returned.
422 No nodes should be returned as an empty list (and not None).
424 Note that if the HPATH for a LU class is None, this function will
428 raise NotImplementedError
430 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
431 """Notify the LU about the results of its hooks.
433 This method is called every time a hooks phase is executed, and notifies
434 the Logical Unit about the hooks' result. The LU can then use it to alter
435 its result based on the hooks. By default the method does nothing and the
436 previous result is passed back unchanged but any LU can define it if it
437 wants to use the local cluster hook-scripts somehow.
439 @param phase: one of L{constants.HOOKS_PHASE_POST} or
440 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
441 @param hook_results: the results of the multi-node hooks rpc call
442 @param feedback_fn: function used send feedback back to the caller
443 @param lu_result: the previous Exec result this LU had, or None
445 @return: the new Exec result, based on the previous result
449 # API must be kept, thus we ignore the unused argument and could
450 # be a function warnings
451 # pylint: disable-msg=W0613,R0201
454 def _ExpandAndLockInstance(self):
455 """Helper function to expand and lock an instance.
457 Many LUs that work on an instance take its name in self.op.instance_name
458 and need to expand it and then declare the expanded name for locking. This
459 function does it, and then updates self.op.instance_name to the expanded
460 name. It also initializes needed_locks as a dict, if this hasn't been done
464 if self.needed_locks is None:
465 self.needed_locks = {}
467 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
468 "_ExpandAndLockInstance called with instance-level locks set"
469 self.op.instance_name = _ExpandInstanceName(self.cfg,
470 self.op.instance_name)
471 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
473 def _LockInstancesNodes(self, primary_only=False):
474 """Helper function to declare instances' nodes for locking.
476 This function should be called after locking one or more instances to lock
477 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
478 with all primary or secondary nodes for instances already locked and
479 present in self.needed_locks[locking.LEVEL_INSTANCE].
481 It should be called from DeclareLocks, and for safety only works if
482 self.recalculate_locks[locking.LEVEL_NODE] is set.
484 In the future it may grow parameters to just lock some instance's nodes, or
485 to just lock primaries or secondary nodes, if needed.
487 If should be called in DeclareLocks in a way similar to::
489 if level == locking.LEVEL_NODE:
490 self._LockInstancesNodes()
492 @type primary_only: boolean
493 @param primary_only: only lock primary nodes of locked instances
496 assert locking.LEVEL_NODE in self.recalculate_locks, \
497 "_LockInstancesNodes helper function called with no nodes to recalculate"
499 # TODO: check if we're really been called with the instance locks held
501 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
502 # future we might want to have different behaviors depending on the value
503 # of self.recalculate_locks[locking.LEVEL_NODE]
505 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
506 instance = self.context.cfg.GetInstanceInfo(instance_name)
507 wanted_nodes.append(instance.primary_node)
509 wanted_nodes.extend(instance.secondary_nodes)
511 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
512 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
513 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
514 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
516 del self.recalculate_locks[locking.LEVEL_NODE]
519 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
520 """Simple LU which runs no hooks.
522 This LU is intended as a parent for other LogicalUnits which will
523 run no hooks, in order to reduce duplicate code.
529 def BuildHooksEnv(self):
530 """Empty BuildHooksEnv for NoHooksLu.
532 This just raises an error.
535 assert False, "BuildHooksEnv called for NoHooksLUs"
539 """Tasklet base class.
541 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
542 they can mix legacy code with tasklets. Locking needs to be done in the LU,
543 tasklets know nothing about locks.
545 Subclasses must follow these rules:
546 - Implement CheckPrereq
550 def __init__(self, lu):
557 def CheckPrereq(self):
558 """Check prerequisites for this tasklets.
560 This method should check whether the prerequisites for the execution of
561 this tasklet are fulfilled. It can do internode communication, but it
562 should be idempotent - no cluster or system changes are allowed.
564 The method should raise errors.OpPrereqError in case something is not
565 fulfilled. Its return value is ignored.
567 This method should also update all parameters to their canonical form if it
568 hasn't been done before.
573 def Exec(self, feedback_fn):
574 """Execute the tasklet.
576 This method should implement the actual work. It should raise
577 errors.OpExecError for failures that are somewhat dealt with in code, or
581 raise NotImplementedError
584 def _GetWantedNodes(lu, nodes):
585 """Returns list of checked and expanded node names.
587 @type lu: L{LogicalUnit}
588 @param lu: the logical unit on whose behalf we execute
590 @param nodes: list of node names or None for all nodes
592 @return: the list of nodes, sorted
593 @raise errors.ProgrammerError: if the nodes parameter is wrong type
597 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
598 " non-empty list of nodes whose name is to be expanded.")
600 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
601 return utils.NiceSort(wanted)
604 def _GetWantedInstances(lu, instances):
605 """Returns list of checked and expanded instance names.
607 @type lu: L{LogicalUnit}
608 @param lu: the logical unit on whose behalf we execute
609 @type instances: list
610 @param instances: list of instance names or None for all instances
612 @return: the list of instances, sorted
613 @raise errors.OpPrereqError: if the instances parameter is wrong type
614 @raise errors.OpPrereqError: if any of the passed instances is not found
618 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
620 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
624 def _GetUpdatedParams(old_params, update_dict,
625 use_default=True, use_none=False):
626 """Return the new version of a parameter dictionary.
628 @type old_params: dict
629 @param old_params: old parameters
630 @type update_dict: dict
631 @param update_dict: dict containing new parameter values, or
632 constants.VALUE_DEFAULT to reset the parameter to its default
634 @param use_default: boolean
635 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
636 values as 'to be deleted' values
637 @param use_none: boolean
638 @type use_none: whether to recognise C{None} values as 'to be
641 @return: the new parameter dictionary
644 params_copy = copy.deepcopy(old_params)
645 for key, val in update_dict.iteritems():
646 if ((use_default and val == constants.VALUE_DEFAULT) or
647 (use_none and val is None)):
653 params_copy[key] = val
657 def _CheckOutputFields(static, dynamic, selected):
658 """Checks whether all selected fields are valid.
660 @type static: L{utils.FieldSet}
661 @param static: static fields set
662 @type dynamic: L{utils.FieldSet}
663 @param dynamic: dynamic fields set
670 delta = f.NonMatching(selected)
672 raise errors.OpPrereqError("Unknown output fields selected: %s"
673 % ",".join(delta), errors.ECODE_INVAL)
676 def _CheckBooleanOpField(op, name):
677 """Validates boolean opcode parameters.
679 This will ensure that an opcode parameter is either a boolean value,
680 or None (but that it always exists).
683 val = getattr(op, name, None)
684 if not (val is None or isinstance(val, bool)):
685 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
686 (name, str(val)), errors.ECODE_INVAL)
687 setattr(op, name, val)
690 def _CheckGlobalHvParams(params):
691 """Validates that given hypervisor params are not global ones.
693 This will ensure that instances don't get customised versions of
697 used_globals = constants.HVC_GLOBALS.intersection(params)
699 msg = ("The following hypervisor parameters are global and cannot"
700 " be customized at instance level, please modify them at"
701 " cluster level: %s" % utils.CommaJoin(used_globals))
702 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
705 def _CheckNodeOnline(lu, node):
706 """Ensure that a given node is online.
708 @param lu: the LU on behalf of which we make the check
709 @param node: the node to check
710 @raise errors.OpPrereqError: if the node is offline
713 if lu.cfg.GetNodeInfo(node).offline:
714 raise errors.OpPrereqError("Can't use offline node %s" % node,
718 def _CheckNodeNotDrained(lu, node):
719 """Ensure that a given node is not drained.
721 @param lu: the LU on behalf of which we make the check
722 @param node: the node to check
723 @raise errors.OpPrereqError: if the node is drained
726 if lu.cfg.GetNodeInfo(node).drained:
727 raise errors.OpPrereqError("Can't use drained node %s" % node,
731 def _CheckNodeHasOS(lu, node, os_name, force_variant):
732 """Ensure that a node supports a given OS.
734 @param lu: the LU on behalf of which we make the check
735 @param node: the node to check
736 @param os_name: the OS to query about
737 @param force_variant: whether to ignore variant errors
738 @raise errors.OpPrereqError: if the node is not supporting the OS
741 result = lu.rpc.call_os_get(node, os_name)
742 result.Raise("OS '%s' not in supported OS list for node %s" %
744 prereq=True, ecode=errors.ECODE_INVAL)
745 if not force_variant:
746 _CheckOSVariant(result.payload, os_name)
749 def _RequireFileStorage():
750 """Checks that file storage is enabled.
752 @raise errors.OpPrereqError: when file storage is disabled
755 if not constants.ENABLE_FILE_STORAGE:
756 raise errors.OpPrereqError("File storage disabled at configure time",
760 def _CheckDiskTemplate(template):
761 """Ensure a given disk template is valid.
764 if template not in constants.DISK_TEMPLATES:
765 msg = ("Invalid disk template name '%s', valid templates are: %s" %
766 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
767 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
768 if template == constants.DT_FILE:
769 _RequireFileStorage()
772 def _CheckStorageType(storage_type):
773 """Ensure a given storage type is valid.
776 if storage_type not in constants.VALID_STORAGE_TYPES:
777 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
779 if storage_type == constants.ST_FILE:
780 _RequireFileStorage()
784 def _GetClusterDomainSecret():
785 """Reads the cluster domain secret.
788 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
792 def _CheckInstanceDown(lu, instance, reason):
793 """Ensure that an instance is not running."""
794 if instance.admin_up:
795 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
796 (instance.name, reason), errors.ECODE_STATE)
798 pnode = instance.primary_node
799 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
800 ins_l.Raise("Can't contact node %s for instance information" % pnode,
801 prereq=True, ecode=errors.ECODE_ENVIRON)
803 if instance.name in ins_l.payload:
804 raise errors.OpPrereqError("Instance %s is running, %s" %
805 (instance.name, reason), errors.ECODE_STATE)
808 def _ExpandItemName(fn, name, kind):
809 """Expand an item name.
811 @param fn: the function to use for expansion
812 @param name: requested item name
813 @param kind: text description ('Node' or 'Instance')
814 @return: the resolved (full) name
815 @raise errors.OpPrereqError: if the item is not found
819 if full_name is None:
820 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
825 def _ExpandNodeName(cfg, name):
826 """Wrapper over L{_ExpandItemName} for nodes."""
827 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
830 def _ExpandInstanceName(cfg, name):
831 """Wrapper over L{_ExpandItemName} for instance."""
832 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
835 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
836 memory, vcpus, nics, disk_template, disks,
837 bep, hvp, hypervisor_name):
838 """Builds instance related env variables for hooks
840 This builds the hook environment from individual variables.
843 @param name: the name of the instance
844 @type primary_node: string
845 @param primary_node: the name of the instance's primary node
846 @type secondary_nodes: list
847 @param secondary_nodes: list of secondary nodes as strings
848 @type os_type: string
849 @param os_type: the name of the instance's OS
850 @type status: boolean
851 @param status: the should_run status of the instance
853 @param memory: the memory size of the instance
855 @param vcpus: the count of VCPUs the instance has
857 @param nics: list of tuples (ip, mac, mode, link) representing
858 the NICs the instance has
859 @type disk_template: string
860 @param disk_template: the disk template of the instance
862 @param disks: the list of (size, mode) pairs
864 @param bep: the backend parameters for the instance
866 @param hvp: the hypervisor parameters for the instance
867 @type hypervisor_name: string
868 @param hypervisor_name: the hypervisor for the instance
870 @return: the hook environment for this instance
879 "INSTANCE_NAME": name,
880 "INSTANCE_PRIMARY": primary_node,
881 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
882 "INSTANCE_OS_TYPE": os_type,
883 "INSTANCE_STATUS": str_status,
884 "INSTANCE_MEMORY": memory,
885 "INSTANCE_VCPUS": vcpus,
886 "INSTANCE_DISK_TEMPLATE": disk_template,
887 "INSTANCE_HYPERVISOR": hypervisor_name,
891 nic_count = len(nics)
892 for idx, (ip, mac, mode, link) in enumerate(nics):
895 env["INSTANCE_NIC%d_IP" % idx] = ip
896 env["INSTANCE_NIC%d_MAC" % idx] = mac
897 env["INSTANCE_NIC%d_MODE" % idx] = mode
898 env["INSTANCE_NIC%d_LINK" % idx] = link
899 if mode == constants.NIC_MODE_BRIDGED:
900 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
904 env["INSTANCE_NIC_COUNT"] = nic_count
907 disk_count = len(disks)
908 for idx, (size, mode) in enumerate(disks):
909 env["INSTANCE_DISK%d_SIZE" % idx] = size
910 env["INSTANCE_DISK%d_MODE" % idx] = mode
914 env["INSTANCE_DISK_COUNT"] = disk_count
916 for source, kind in [(bep, "BE"), (hvp, "HV")]:
917 for key, value in source.items():
918 env["INSTANCE_%s_%s" % (kind, key)] = value
923 def _NICListToTuple(lu, nics):
924 """Build a list of nic information tuples.
926 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
927 value in LUQueryInstanceData.
929 @type lu: L{LogicalUnit}
930 @param lu: the logical unit on whose behalf we execute
931 @type nics: list of L{objects.NIC}
932 @param nics: list of nics to convert to hooks tuples
936 cluster = lu.cfg.GetClusterInfo()
940 filled_params = cluster.SimpleFillNIC(nic.nicparams)
941 mode = filled_params[constants.NIC_MODE]
942 link = filled_params[constants.NIC_LINK]
943 hooks_nics.append((ip, mac, mode, link))
947 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
948 """Builds instance related env variables for hooks from an object.
950 @type lu: L{LogicalUnit}
951 @param lu: the logical unit on whose behalf we execute
952 @type instance: L{objects.Instance}
953 @param instance: the instance for which we should build the
956 @param override: dictionary with key/values that will override
959 @return: the hook environment dictionary
962 cluster = lu.cfg.GetClusterInfo()
963 bep = cluster.FillBE(instance)
964 hvp = cluster.FillHV(instance)
966 'name': instance.name,
967 'primary_node': instance.primary_node,
968 'secondary_nodes': instance.secondary_nodes,
969 'os_type': instance.os,
970 'status': instance.admin_up,
971 'memory': bep[constants.BE_MEMORY],
972 'vcpus': bep[constants.BE_VCPUS],
973 'nics': _NICListToTuple(lu, instance.nics),
974 'disk_template': instance.disk_template,
975 'disks': [(disk.size, disk.mode) for disk in instance.disks],
978 'hypervisor_name': instance.hypervisor,
981 args.update(override)
982 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
985 def _AdjustCandidatePool(lu, exceptions):
986 """Adjust the candidate pool after node operations.
989 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
991 lu.LogInfo("Promoted nodes to master candidate role: %s",
992 utils.CommaJoin(node.name for node in mod_list))
993 for name in mod_list:
994 lu.context.ReaddNode(name)
995 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
997 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1001 def _DecideSelfPromotion(lu, exceptions=None):
1002 """Decide whether I should promote myself as a master candidate.
1005 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1006 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1007 # the new node will increase mc_max with one, so:
1008 mc_should = min(mc_should + 1, cp_size)
1009 return mc_now < mc_should
1012 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1013 """Check that the brigdes needed by a list of nics exist.
1016 cluster = lu.cfg.GetClusterInfo()
1017 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1018 brlist = [params[constants.NIC_LINK] for params in paramslist
1019 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1021 result = lu.rpc.call_bridges_exist(target_node, brlist)
1022 result.Raise("Error checking bridges on destination node '%s'" %
1023 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1026 def _CheckInstanceBridgesExist(lu, instance, node=None):
1027 """Check that the brigdes needed by an instance exist.
1031 node = instance.primary_node
1032 _CheckNicsBridgesExist(lu, instance.nics, node)
1035 def _CheckOSVariant(os_obj, name):
1036 """Check whether an OS name conforms to the os variants specification.
1038 @type os_obj: L{objects.OS}
1039 @param os_obj: OS object to check
1041 @param name: OS name passed by the user, to check for validity
1044 if not os_obj.supported_variants:
1047 variant = name.split("+", 1)[1]
1049 raise errors.OpPrereqError("OS name must include a variant",
1052 if variant not in os_obj.supported_variants:
1053 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1056 def _GetNodeInstancesInner(cfg, fn):
1057 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1060 def _GetNodeInstances(cfg, node_name):
1061 """Returns a list of all primary and secondary instances on a node.
1065 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1068 def _GetNodePrimaryInstances(cfg, node_name):
1069 """Returns primary instances on a node.
1072 return _GetNodeInstancesInner(cfg,
1073 lambda inst: node_name == inst.primary_node)
1076 def _GetNodeSecondaryInstances(cfg, node_name):
1077 """Returns secondary instances on a node.
1080 return _GetNodeInstancesInner(cfg,
1081 lambda inst: node_name in inst.secondary_nodes)
1084 def _GetStorageTypeArgs(cfg, storage_type):
1085 """Returns the arguments for a storage type.
1088 # Special case for file storage
1089 if storage_type == constants.ST_FILE:
1090 # storage.FileStorage wants a list of storage directories
1091 return [[cfg.GetFileStorageDir()]]
1096 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1099 for dev in instance.disks:
1100 cfg.SetDiskID(dev, node_name)
1102 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1103 result.Raise("Failed to get disk status from node %s" % node_name,
1104 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1106 for idx, bdev_status in enumerate(result.payload):
1107 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1113 class LUPostInitCluster(LogicalUnit):
1114 """Logical unit for running hooks after cluster initialization.
1117 HPATH = "cluster-init"
1118 HTYPE = constants.HTYPE_CLUSTER
1121 def BuildHooksEnv(self):
1125 env = {"OP_TARGET": self.cfg.GetClusterName()}
1126 mn = self.cfg.GetMasterNode()
1127 return env, [], [mn]
1129 def Exec(self, feedback_fn):
1136 class LUDestroyCluster(LogicalUnit):
1137 """Logical unit for destroying the cluster.
1140 HPATH = "cluster-destroy"
1141 HTYPE = constants.HTYPE_CLUSTER
1144 def BuildHooksEnv(self):
1148 env = {"OP_TARGET": self.cfg.GetClusterName()}
1151 def CheckPrereq(self):
1152 """Check prerequisites.
1154 This checks whether the cluster is empty.
1156 Any errors are signaled by raising errors.OpPrereqError.
1159 master = self.cfg.GetMasterNode()
1161 nodelist = self.cfg.GetNodeList()
1162 if len(nodelist) != 1 or nodelist[0] != master:
1163 raise errors.OpPrereqError("There are still %d node(s) in"
1164 " this cluster." % (len(nodelist) - 1),
1166 instancelist = self.cfg.GetInstanceList()
1168 raise errors.OpPrereqError("There are still %d instance(s) in"
1169 " this cluster." % len(instancelist),
1172 def Exec(self, feedback_fn):
1173 """Destroys the cluster.
1176 master = self.cfg.GetMasterNode()
1177 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1179 # Run post hooks on master node before it's removed
1180 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1182 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1184 # pylint: disable-msg=W0702
1185 self.LogWarning("Errors occurred running hooks on %s" % master)
1187 result = self.rpc.call_node_stop_master(master, False)
1188 result.Raise("Could not disable the master role")
1190 if modify_ssh_setup:
1191 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1192 utils.CreateBackup(priv_key)
1193 utils.CreateBackup(pub_key)
1198 def _VerifyCertificate(filename):
1199 """Verifies a certificate for LUVerifyCluster.
1201 @type filename: string
1202 @param filename: Path to PEM file
1206 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1207 utils.ReadFile(filename))
1208 except Exception, err: # pylint: disable-msg=W0703
1209 return (LUVerifyCluster.ETYPE_ERROR,
1210 "Failed to load X509 certificate %s: %s" % (filename, err))
1213 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1214 constants.SSL_CERT_EXPIRATION_ERROR)
1217 fnamemsg = "While verifying %s: %s" % (filename, msg)
1222 return (None, fnamemsg)
1223 elif errcode == utils.CERT_WARNING:
1224 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1225 elif errcode == utils.CERT_ERROR:
1226 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1228 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1231 class LUVerifyCluster(LogicalUnit):
1232 """Verifies the cluster status.
1235 HPATH = "cluster-verify"
1236 HTYPE = constants.HTYPE_CLUSTER
1238 ("skip_checks", _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1239 ("verbose", _TBool),
1240 ("error_codes", _TBool),
1241 ("debug_simulate_errors", _TBool),
1245 TCLUSTER = "cluster"
1247 TINSTANCE = "instance"
1249 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1250 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1251 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1252 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1253 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1254 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1255 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1256 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1257 ENODEDRBD = (TNODE, "ENODEDRBD")
1258 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1259 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1260 ENODEHV = (TNODE, "ENODEHV")
1261 ENODELVM = (TNODE, "ENODELVM")
1262 ENODEN1 = (TNODE, "ENODEN1")
1263 ENODENET = (TNODE, "ENODENET")
1264 ENODEOS = (TNODE, "ENODEOS")
1265 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1266 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1267 ENODERPC = (TNODE, "ENODERPC")
1268 ENODESSH = (TNODE, "ENODESSH")
1269 ENODEVERSION = (TNODE, "ENODEVERSION")
1270 ENODESETUP = (TNODE, "ENODESETUP")
1271 ENODETIME = (TNODE, "ENODETIME")
1273 ETYPE_FIELD = "code"
1274 ETYPE_ERROR = "ERROR"
1275 ETYPE_WARNING = "WARNING"
1277 class NodeImage(object):
1278 """A class representing the logical and physical status of a node.
1281 @ivar name: the node name to which this object refers
1282 @ivar volumes: a structure as returned from
1283 L{ganeti.backend.GetVolumeList} (runtime)
1284 @ivar instances: a list of running instances (runtime)
1285 @ivar pinst: list of configured primary instances (config)
1286 @ivar sinst: list of configured secondary instances (config)
1287 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1288 of this node (config)
1289 @ivar mfree: free memory, as reported by hypervisor (runtime)
1290 @ivar dfree: free disk, as reported by the node (runtime)
1291 @ivar offline: the offline status (config)
1292 @type rpc_fail: boolean
1293 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1294 not whether the individual keys were correct) (runtime)
1295 @type lvm_fail: boolean
1296 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1297 @type hyp_fail: boolean
1298 @ivar hyp_fail: whether the RPC call didn't return the instance list
1299 @type ghost: boolean
1300 @ivar ghost: whether this is a known node or not (config)
1301 @type os_fail: boolean
1302 @ivar os_fail: whether the RPC call didn't return valid OS data
1304 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1307 def __init__(self, offline=False, name=None):
1316 self.offline = offline
1317 self.rpc_fail = False
1318 self.lvm_fail = False
1319 self.hyp_fail = False
1321 self.os_fail = False
1324 def ExpandNames(self):
1325 self.needed_locks = {
1326 locking.LEVEL_NODE: locking.ALL_SET,
1327 locking.LEVEL_INSTANCE: locking.ALL_SET,
1329 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1331 def _Error(self, ecode, item, msg, *args, **kwargs):
1332 """Format an error message.
1334 Based on the opcode's error_codes parameter, either format a
1335 parseable error code, or a simpler error string.
1337 This must be called only from Exec and functions called from Exec.
1340 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1342 # first complete the msg
1345 # then format the whole message
1346 if self.op.error_codes:
1347 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1353 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1354 # and finally report it via the feedback_fn
1355 self._feedback_fn(" - %s" % msg)
1357 def _ErrorIf(self, cond, *args, **kwargs):
1358 """Log an error message if the passed condition is True.
1361 cond = bool(cond) or self.op.debug_simulate_errors
1363 self._Error(*args, **kwargs)
1364 # do not mark the operation as failed for WARN cases only
1365 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1366 self.bad = self.bad or cond
1368 def _VerifyNode(self, ninfo, nresult):
1369 """Run multiple tests against a node.
1373 - compares ganeti version
1374 - checks vg existence and size > 20G
1375 - checks config file checksum
1376 - checks ssh to other nodes
1378 @type ninfo: L{objects.Node}
1379 @param ninfo: the node to check
1380 @param nresult: the results from the node
1382 @return: whether overall this call was successful (and we can expect
1383 reasonable values in the respose)
1387 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1389 # main result, nresult should be a non-empty dict
1390 test = not nresult or not isinstance(nresult, dict)
1391 _ErrorIf(test, self.ENODERPC, node,
1392 "unable to verify node: no data returned")
1396 # compares ganeti version
1397 local_version = constants.PROTOCOL_VERSION
1398 remote_version = nresult.get("version", None)
1399 test = not (remote_version and
1400 isinstance(remote_version, (list, tuple)) and
1401 len(remote_version) == 2)
1402 _ErrorIf(test, self.ENODERPC, node,
1403 "connection to node returned invalid data")
1407 test = local_version != remote_version[0]
1408 _ErrorIf(test, self.ENODEVERSION, node,
1409 "incompatible protocol versions: master %s,"
1410 " node %s", local_version, remote_version[0])
1414 # node seems compatible, we can actually try to look into its results
1416 # full package version
1417 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1418 self.ENODEVERSION, node,
1419 "software version mismatch: master %s, node %s",
1420 constants.RELEASE_VERSION, remote_version[1],
1421 code=self.ETYPE_WARNING)
1423 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1424 if isinstance(hyp_result, dict):
1425 for hv_name, hv_result in hyp_result.iteritems():
1426 test = hv_result is not None
1427 _ErrorIf(test, self.ENODEHV, node,
1428 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1431 test = nresult.get(constants.NV_NODESETUP,
1432 ["Missing NODESETUP results"])
1433 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1438 def _VerifyNodeTime(self, ninfo, nresult,
1439 nvinfo_starttime, nvinfo_endtime):
1440 """Check the node time.
1442 @type ninfo: L{objects.Node}
1443 @param ninfo: the node to check
1444 @param nresult: the remote results for the node
1445 @param nvinfo_starttime: the start time of the RPC call
1446 @param nvinfo_endtime: the end time of the RPC call
1450 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1452 ntime = nresult.get(constants.NV_TIME, None)
1454 ntime_merged = utils.MergeTime(ntime)
1455 except (ValueError, TypeError):
1456 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1459 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1460 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1461 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1462 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1466 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1467 "Node time diverges by at least %s from master node time",
1470 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1471 """Check the node time.
1473 @type ninfo: L{objects.Node}
1474 @param ninfo: the node to check
1475 @param nresult: the remote results for the node
1476 @param vg_name: the configured VG name
1483 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1485 # checks vg existence and size > 20G
1486 vglist = nresult.get(constants.NV_VGLIST, None)
1488 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1490 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1491 constants.MIN_VG_SIZE)
1492 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1495 pvlist = nresult.get(constants.NV_PVLIST, None)
1496 test = pvlist is None
1497 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1499 # check that ':' is not present in PV names, since it's a
1500 # special character for lvcreate (denotes the range of PEs to
1502 for _, pvname, owner_vg in pvlist:
1503 test = ":" in pvname
1504 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1505 " '%s' of VG '%s'", pvname, owner_vg)
1507 def _VerifyNodeNetwork(self, ninfo, nresult):
1508 """Check the node time.
1510 @type ninfo: L{objects.Node}
1511 @param ninfo: the node to check
1512 @param nresult: the remote results for the node
1516 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1518 test = constants.NV_NODELIST not in nresult
1519 _ErrorIf(test, self.ENODESSH, node,
1520 "node hasn't returned node ssh connectivity data")
1522 if nresult[constants.NV_NODELIST]:
1523 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1524 _ErrorIf(True, self.ENODESSH, node,
1525 "ssh communication with node '%s': %s", a_node, a_msg)
1527 test = constants.NV_NODENETTEST not in nresult
1528 _ErrorIf(test, self.ENODENET, node,
1529 "node hasn't returned node tcp connectivity data")
1531 if nresult[constants.NV_NODENETTEST]:
1532 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1534 _ErrorIf(True, self.ENODENET, node,
1535 "tcp communication with node '%s': %s",
1536 anode, nresult[constants.NV_NODENETTEST][anode])
1538 test = constants.NV_MASTERIP not in nresult
1539 _ErrorIf(test, self.ENODENET, node,
1540 "node hasn't returned node master IP reachability data")
1542 if not nresult[constants.NV_MASTERIP]:
1543 if node == self.master_node:
1544 msg = "the master node cannot reach the master IP (not configured?)"
1546 msg = "cannot reach the master IP"
1547 _ErrorIf(True, self.ENODENET, node, msg)
1550 def _VerifyInstance(self, instance, instanceconfig, node_image):
1551 """Verify an instance.
1553 This function checks to see if the required block devices are
1554 available on the instance's node.
1557 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1558 node_current = instanceconfig.primary_node
1560 node_vol_should = {}
1561 instanceconfig.MapLVsByNode(node_vol_should)
1563 for node in node_vol_should:
1564 n_img = node_image[node]
1565 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1566 # ignore missing volumes on offline or broken nodes
1568 for volume in node_vol_should[node]:
1569 test = volume not in n_img.volumes
1570 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1571 "volume %s missing on node %s", volume, node)
1573 if instanceconfig.admin_up:
1574 pri_img = node_image[node_current]
1575 test = instance not in pri_img.instances and not pri_img.offline
1576 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1577 "instance not running on its primary node %s",
1580 for node, n_img in node_image.items():
1581 if (not node == node_current):
1582 test = instance in n_img.instances
1583 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1584 "instance should not run on node %s", node)
1586 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1587 """Verify if there are any unknown volumes in the cluster.
1589 The .os, .swap and backup volumes are ignored. All other volumes are
1590 reported as unknown.
1593 for node, n_img in node_image.items():
1594 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1595 # skip non-healthy nodes
1597 for volume in n_img.volumes:
1598 test = (node not in node_vol_should or
1599 volume not in node_vol_should[node])
1600 self._ErrorIf(test, self.ENODEORPHANLV, node,
1601 "volume %s is unknown", volume)
1603 def _VerifyOrphanInstances(self, instancelist, node_image):
1604 """Verify the list of running instances.
1606 This checks what instances are running but unknown to the cluster.
1609 for node, n_img in node_image.items():
1610 for o_inst in n_img.instances:
1611 test = o_inst not in instancelist
1612 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1613 "instance %s on node %s should not exist", o_inst, node)
1615 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1616 """Verify N+1 Memory Resilience.
1618 Check that if one single node dies we can still start all the
1619 instances it was primary for.
1622 for node, n_img in node_image.items():
1623 # This code checks that every node which is now listed as
1624 # secondary has enough memory to host all instances it is
1625 # supposed to should a single other node in the cluster fail.
1626 # FIXME: not ready for failover to an arbitrary node
1627 # FIXME: does not support file-backed instances
1628 # WARNING: we currently take into account down instances as well
1629 # as up ones, considering that even if they're down someone
1630 # might want to start them even in the event of a node failure.
1631 for prinode, instances in n_img.sbp.items():
1633 for instance in instances:
1634 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1635 if bep[constants.BE_AUTO_BALANCE]:
1636 needed_mem += bep[constants.BE_MEMORY]
1637 test = n_img.mfree < needed_mem
1638 self._ErrorIf(test, self.ENODEN1, node,
1639 "not enough memory on to accommodate"
1640 " failovers should peer node %s fail", prinode)
1642 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1644 """Verifies and computes the node required file checksums.
1646 @type ninfo: L{objects.Node}
1647 @param ninfo: the node to check
1648 @param nresult: the remote results for the node
1649 @param file_list: required list of files
1650 @param local_cksum: dictionary of local files and their checksums
1651 @param master_files: list of files that only masters should have
1655 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1657 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1658 test = not isinstance(remote_cksum, dict)
1659 _ErrorIf(test, self.ENODEFILECHECK, node,
1660 "node hasn't returned file checksum data")
1664 for file_name in file_list:
1665 node_is_mc = ninfo.master_candidate
1666 must_have = (file_name not in master_files) or node_is_mc
1668 test1 = file_name not in remote_cksum
1670 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1672 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1673 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1674 "file '%s' missing", file_name)
1675 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1676 "file '%s' has wrong checksum", file_name)
1677 # not candidate and this is not a must-have file
1678 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1679 "file '%s' should not exist on non master"
1680 " candidates (and the file is outdated)", file_name)
1681 # all good, except non-master/non-must have combination
1682 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1683 "file '%s' should not exist"
1684 " on non master candidates", file_name)
1686 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1687 """Verifies and the node DRBD status.
1689 @type ninfo: L{objects.Node}
1690 @param ninfo: the node to check
1691 @param nresult: the remote results for the node
1692 @param instanceinfo: the dict of instances
1693 @param drbd_map: the DRBD map as returned by
1694 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1698 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1700 # compute the DRBD minors
1702 for minor, instance in drbd_map[node].items():
1703 test = instance not in instanceinfo
1704 _ErrorIf(test, self.ECLUSTERCFG, None,
1705 "ghost instance '%s' in temporary DRBD map", instance)
1706 # ghost instance should not be running, but otherwise we
1707 # don't give double warnings (both ghost instance and
1708 # unallocated minor in use)
1710 node_drbd[minor] = (instance, False)
1712 instance = instanceinfo[instance]
1713 node_drbd[minor] = (instance.name, instance.admin_up)
1715 # and now check them
1716 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1717 test = not isinstance(used_minors, (tuple, list))
1718 _ErrorIf(test, self.ENODEDRBD, node,
1719 "cannot parse drbd status file: %s", str(used_minors))
1721 # we cannot check drbd status
1724 for minor, (iname, must_exist) in node_drbd.items():
1725 test = minor not in used_minors and must_exist
1726 _ErrorIf(test, self.ENODEDRBD, node,
1727 "drbd minor %d of instance %s is not active", minor, iname)
1728 for minor in used_minors:
1729 test = minor not in node_drbd
1730 _ErrorIf(test, self.ENODEDRBD, node,
1731 "unallocated drbd minor %d is in use", minor)
1733 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1734 """Builds the node OS structures.
1736 @type ninfo: L{objects.Node}
1737 @param ninfo: the node to check
1738 @param nresult: the remote results for the node
1739 @param nimg: the node image object
1743 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1745 remote_os = nresult.get(constants.NV_OSLIST, None)
1746 test = (not isinstance(remote_os, list) or
1747 not compat.all(remote_os,
1748 lambda v: isinstance(v, list) and len(v) == 7))
1750 _ErrorIf(test, self.ENODEOS, node,
1751 "node hasn't returned valid OS data")
1760 for (name, os_path, status, diagnose,
1761 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1763 if name not in os_dict:
1766 # parameters is a list of lists instead of list of tuples due to
1767 # JSON lacking a real tuple type, fix it:
1768 parameters = [tuple(v) for v in parameters]
1769 os_dict[name].append((os_path, status, diagnose,
1770 set(variants), set(parameters), set(api_ver)))
1772 nimg.oslist = os_dict
1774 def _VerifyNodeOS(self, ninfo, nimg, base):
1775 """Verifies the node OS list.
1777 @type ninfo: L{objects.Node}
1778 @param ninfo: the node to check
1779 @param nimg: the node image object
1780 @param base: the 'template' node we match against (e.g. from the master)
1784 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1786 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1788 for os_name, os_data in nimg.oslist.items():
1789 assert os_data, "Empty OS status for OS %s?!" % os_name
1790 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1791 _ErrorIf(not f_status, self.ENODEOS, node,
1792 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1793 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1794 "OS '%s' has multiple entries (first one shadows the rest): %s",
1795 os_name, utils.CommaJoin([v[0] for v in os_data]))
1796 # this will catched in backend too
1797 _ErrorIf(compat.any(f_api, lambda v: v >= constants.OS_API_V15)
1798 and not f_var, self.ENODEOS, node,
1799 "OS %s with API at least %d does not declare any variant",
1800 os_name, constants.OS_API_V15)
1801 # comparisons with the 'base' image
1802 test = os_name not in base.oslist
1803 _ErrorIf(test, self.ENODEOS, node,
1804 "Extra OS %s not present on reference node (%s)",
1808 assert base.oslist[os_name], "Base node has empty OS status?"
1809 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1811 # base OS is invalid, skipping
1813 for kind, a, b in [("API version", f_api, b_api),
1814 ("variants list", f_var, b_var),
1815 ("parameters", f_param, b_param)]:
1816 _ErrorIf(a != b, self.ENODEOS, node,
1817 "OS %s %s differs from reference node %s: %s vs. %s",
1818 kind, os_name, base.name,
1819 utils.CommaJoin(a), utils.CommaJoin(b))
1821 # check any missing OSes
1822 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1823 _ErrorIf(missing, self.ENODEOS, node,
1824 "OSes present on reference node %s but missing on this node: %s",
1825 base.name, utils.CommaJoin(missing))
1827 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1828 """Verifies and updates the node volume data.
1830 This function will update a L{NodeImage}'s internal structures
1831 with data from the remote call.
1833 @type ninfo: L{objects.Node}
1834 @param ninfo: the node to check
1835 @param nresult: the remote results for the node
1836 @param nimg: the node image object
1837 @param vg_name: the configured VG name
1841 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1843 nimg.lvm_fail = True
1844 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1847 elif isinstance(lvdata, basestring):
1848 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1849 utils.SafeEncode(lvdata))
1850 elif not isinstance(lvdata, dict):
1851 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1853 nimg.volumes = lvdata
1854 nimg.lvm_fail = False
1856 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1857 """Verifies and updates the node instance list.
1859 If the listing was successful, then updates this node's instance
1860 list. Otherwise, it marks the RPC call as failed for the instance
1863 @type ninfo: L{objects.Node}
1864 @param ninfo: the node to check
1865 @param nresult: the remote results for the node
1866 @param nimg: the node image object
1869 idata = nresult.get(constants.NV_INSTANCELIST, None)
1870 test = not isinstance(idata, list)
1871 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1872 " (instancelist): %s", utils.SafeEncode(str(idata)))
1874 nimg.hyp_fail = True
1876 nimg.instances = idata
1878 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1879 """Verifies and computes a node information map
1881 @type ninfo: L{objects.Node}
1882 @param ninfo: the node to check
1883 @param nresult: the remote results for the node
1884 @param nimg: the node image object
1885 @param vg_name: the configured VG name
1889 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1891 # try to read free memory (from the hypervisor)
1892 hv_info = nresult.get(constants.NV_HVINFO, None)
1893 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1894 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1897 nimg.mfree = int(hv_info["memory_free"])
1898 except (ValueError, TypeError):
1899 _ErrorIf(True, self.ENODERPC, node,
1900 "node returned invalid nodeinfo, check hypervisor")
1902 # FIXME: devise a free space model for file based instances as well
1903 if vg_name is not None:
1904 test = (constants.NV_VGLIST not in nresult or
1905 vg_name not in nresult[constants.NV_VGLIST])
1906 _ErrorIf(test, self.ENODELVM, node,
1907 "node didn't return data for the volume group '%s'"
1908 " - it is either missing or broken", vg_name)
1911 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1912 except (ValueError, TypeError):
1913 _ErrorIf(True, self.ENODERPC, node,
1914 "node returned invalid LVM info, check LVM status")
1916 def BuildHooksEnv(self):
1919 Cluster-Verify hooks just ran in the post phase and their failure makes
1920 the output be logged in the verify output and the verification to fail.
1923 all_nodes = self.cfg.GetNodeList()
1925 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1927 for node in self.cfg.GetAllNodesInfo().values():
1928 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1930 return env, [], all_nodes
1932 def Exec(self, feedback_fn):
1933 """Verify integrity of cluster, performing various test on nodes.
1937 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1938 verbose = self.op.verbose
1939 self._feedback_fn = feedback_fn
1940 feedback_fn("* Verifying global settings")
1941 for msg in self.cfg.VerifyConfig():
1942 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1944 # Check the cluster certificates
1945 for cert_filename in constants.ALL_CERT_FILES:
1946 (errcode, msg) = _VerifyCertificate(cert_filename)
1947 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1949 vg_name = self.cfg.GetVGName()
1950 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1951 cluster = self.cfg.GetClusterInfo()
1952 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1953 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1954 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1955 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1956 for iname in instancelist)
1957 i_non_redundant = [] # Non redundant instances
1958 i_non_a_balanced = [] # Non auto-balanced instances
1959 n_offline = 0 # Count of offline nodes
1960 n_drained = 0 # Count of nodes being drained
1961 node_vol_should = {}
1963 # FIXME: verify OS list
1964 # do local checksums
1965 master_files = [constants.CLUSTER_CONF_FILE]
1966 master_node = self.master_node = self.cfg.GetMasterNode()
1967 master_ip = self.cfg.GetMasterIP()
1969 file_names = ssconf.SimpleStore().GetFileList()
1970 file_names.extend(constants.ALL_CERT_FILES)
1971 file_names.extend(master_files)
1972 if cluster.modify_etc_hosts:
1973 file_names.append(constants.ETC_HOSTS)
1975 local_checksums = utils.FingerprintFiles(file_names)
1977 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1978 node_verify_param = {
1979 constants.NV_FILELIST: file_names,
1980 constants.NV_NODELIST: [node.name for node in nodeinfo
1981 if not node.offline],
1982 constants.NV_HYPERVISOR: hypervisors,
1983 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1984 node.secondary_ip) for node in nodeinfo
1985 if not node.offline],
1986 constants.NV_INSTANCELIST: hypervisors,
1987 constants.NV_VERSION: None,
1988 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1989 constants.NV_NODESETUP: None,
1990 constants.NV_TIME: None,
1991 constants.NV_MASTERIP: (master_node, master_ip),
1992 constants.NV_OSLIST: None,
1995 if vg_name is not None:
1996 node_verify_param[constants.NV_VGLIST] = None
1997 node_verify_param[constants.NV_LVLIST] = vg_name
1998 node_verify_param[constants.NV_PVLIST] = [vg_name]
1999 node_verify_param[constants.NV_DRBDLIST] = None
2001 # Build our expected cluster state
2002 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2004 for node in nodeinfo)
2006 for instance in instancelist:
2007 inst_config = instanceinfo[instance]
2009 for nname in inst_config.all_nodes:
2010 if nname not in node_image:
2012 gnode = self.NodeImage(name=nname)
2014 node_image[nname] = gnode
2016 inst_config.MapLVsByNode(node_vol_should)
2018 pnode = inst_config.primary_node
2019 node_image[pnode].pinst.append(instance)
2021 for snode in inst_config.secondary_nodes:
2022 nimg = node_image[snode]
2023 nimg.sinst.append(instance)
2024 if pnode not in nimg.sbp:
2025 nimg.sbp[pnode] = []
2026 nimg.sbp[pnode].append(instance)
2028 # At this point, we have the in-memory data structures complete,
2029 # except for the runtime information, which we'll gather next
2031 # Due to the way our RPC system works, exact response times cannot be
2032 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2033 # time before and after executing the request, we can at least have a time
2035 nvinfo_starttime = time.time()
2036 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2037 self.cfg.GetClusterName())
2038 nvinfo_endtime = time.time()
2040 all_drbd_map = self.cfg.ComputeDRBDMap()
2042 feedback_fn("* Verifying node status")
2046 for node_i in nodeinfo:
2048 nimg = node_image[node]
2052 feedback_fn("* Skipping offline node %s" % (node,))
2056 if node == master_node:
2058 elif node_i.master_candidate:
2059 ntype = "master candidate"
2060 elif node_i.drained:
2066 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2068 msg = all_nvinfo[node].fail_msg
2069 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2071 nimg.rpc_fail = True
2074 nresult = all_nvinfo[node].payload
2076 nimg.call_ok = self._VerifyNode(node_i, nresult)
2077 self._VerifyNodeNetwork(node_i, nresult)
2078 self._VerifyNodeLVM(node_i, nresult, vg_name)
2079 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2081 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
2082 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2084 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2085 self._UpdateNodeInstances(node_i, nresult, nimg)
2086 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2087 self._UpdateNodeOS(node_i, nresult, nimg)
2088 if not nimg.os_fail:
2089 if refos_img is None:
2091 self._VerifyNodeOS(node_i, nimg, refos_img)
2093 feedback_fn("* Verifying instance status")
2094 for instance in instancelist:
2096 feedback_fn("* Verifying instance %s" % instance)
2097 inst_config = instanceinfo[instance]
2098 self._VerifyInstance(instance, inst_config, node_image)
2099 inst_nodes_offline = []
2101 pnode = inst_config.primary_node
2102 pnode_img = node_image[pnode]
2103 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2104 self.ENODERPC, pnode, "instance %s, connection to"
2105 " primary node failed", instance)
2107 if pnode_img.offline:
2108 inst_nodes_offline.append(pnode)
2110 # If the instance is non-redundant we cannot survive losing its primary
2111 # node, so we are not N+1 compliant. On the other hand we have no disk
2112 # templates with more than one secondary so that situation is not well
2114 # FIXME: does not support file-backed instances
2115 if not inst_config.secondary_nodes:
2116 i_non_redundant.append(instance)
2117 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2118 instance, "instance has multiple secondary nodes: %s",
2119 utils.CommaJoin(inst_config.secondary_nodes),
2120 code=self.ETYPE_WARNING)
2122 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2123 i_non_a_balanced.append(instance)
2125 for snode in inst_config.secondary_nodes:
2126 s_img = node_image[snode]
2127 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2128 "instance %s, connection to secondary node failed", instance)
2131 inst_nodes_offline.append(snode)
2133 # warn that the instance lives on offline nodes
2134 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2135 "instance lives on offline node(s) %s",
2136 utils.CommaJoin(inst_nodes_offline))
2137 # ... or ghost nodes
2138 for node in inst_config.all_nodes:
2139 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2140 "instance lives on ghost node %s", node)
2142 feedback_fn("* Verifying orphan volumes")
2143 self._VerifyOrphanVolumes(node_vol_should, node_image)
2145 feedback_fn("* Verifying orphan instances")
2146 self._VerifyOrphanInstances(instancelist, node_image)
2148 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2149 feedback_fn("* Verifying N+1 Memory redundancy")
2150 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2152 feedback_fn("* Other Notes")
2154 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2155 % len(i_non_redundant))
2157 if i_non_a_balanced:
2158 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2159 % len(i_non_a_balanced))
2162 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2165 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2169 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2170 """Analyze the post-hooks' result
2172 This method analyses the hook result, handles it, and sends some
2173 nicely-formatted feedback back to the user.
2175 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2176 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2177 @param hooks_results: the results of the multi-node hooks rpc call
2178 @param feedback_fn: function used send feedback back to the caller
2179 @param lu_result: previous Exec result
2180 @return: the new Exec result, based on the previous result
2184 # We only really run POST phase hooks, and are only interested in
2186 if phase == constants.HOOKS_PHASE_POST:
2187 # Used to change hooks' output to proper indentation
2188 indent_re = re.compile('^', re.M)
2189 feedback_fn("* Hooks Results")
2190 assert hooks_results, "invalid result from hooks"
2192 for node_name in hooks_results:
2193 res = hooks_results[node_name]
2195 test = msg and not res.offline
2196 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2197 "Communication failure in hooks execution: %s", msg)
2198 if res.offline or msg:
2199 # No need to investigate payload if node is offline or gave an error.
2200 # override manually lu_result here as _ErrorIf only
2201 # overrides self.bad
2204 for script, hkr, output in res.payload:
2205 test = hkr == constants.HKR_FAIL
2206 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2207 "Script %s failed, output:", script)
2209 output = indent_re.sub(' ', output)
2210 feedback_fn("%s" % output)
2216 class LUVerifyDisks(NoHooksLU):
2217 """Verifies the cluster disks status.
2223 def ExpandNames(self):
2224 self.needed_locks = {
2225 locking.LEVEL_NODE: locking.ALL_SET,
2226 locking.LEVEL_INSTANCE: locking.ALL_SET,
2228 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2230 def Exec(self, feedback_fn):
2231 """Verify integrity of cluster disks.
2233 @rtype: tuple of three items
2234 @return: a tuple of (dict of node-to-node_error, list of instances
2235 which need activate-disks, dict of instance: (node, volume) for
2239 result = res_nodes, res_instances, res_missing = {}, [], {}
2241 vg_name = self.cfg.GetVGName()
2242 nodes = utils.NiceSort(self.cfg.GetNodeList())
2243 instances = [self.cfg.GetInstanceInfo(name)
2244 for name in self.cfg.GetInstanceList()]
2247 for inst in instances:
2249 if (not inst.admin_up or
2250 inst.disk_template not in constants.DTS_NET_MIRROR):
2252 inst.MapLVsByNode(inst_lvs)
2253 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2254 for node, vol_list in inst_lvs.iteritems():
2255 for vol in vol_list:
2256 nv_dict[(node, vol)] = inst
2261 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2265 node_res = node_lvs[node]
2266 if node_res.offline:
2268 msg = node_res.fail_msg
2270 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2271 res_nodes[node] = msg
2274 lvs = node_res.payload
2275 for lv_name, (_, _, lv_online) in lvs.items():
2276 inst = nv_dict.pop((node, lv_name), None)
2277 if (not lv_online and inst is not None
2278 and inst.name not in res_instances):
2279 res_instances.append(inst.name)
2281 # any leftover items in nv_dict are missing LVs, let's arrange the
2283 for key, inst in nv_dict.iteritems():
2284 if inst.name not in res_missing:
2285 res_missing[inst.name] = []
2286 res_missing[inst.name].append(key)
2291 class LURepairDiskSizes(NoHooksLU):
2292 """Verifies the cluster disks sizes.
2295 _OP_REQP = [("instances", _TListOf(_TNonEmptyString))]
2298 def ExpandNames(self):
2299 if self.op.instances:
2300 self.wanted_names = []
2301 for name in self.op.instances:
2302 full_name = _ExpandInstanceName(self.cfg, name)
2303 self.wanted_names.append(full_name)
2304 self.needed_locks = {
2305 locking.LEVEL_NODE: [],
2306 locking.LEVEL_INSTANCE: self.wanted_names,
2308 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2310 self.wanted_names = None
2311 self.needed_locks = {
2312 locking.LEVEL_NODE: locking.ALL_SET,
2313 locking.LEVEL_INSTANCE: locking.ALL_SET,
2315 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2317 def DeclareLocks(self, level):
2318 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2319 self._LockInstancesNodes(primary_only=True)
2321 def CheckPrereq(self):
2322 """Check prerequisites.
2324 This only checks the optional instance list against the existing names.
2327 if self.wanted_names is None:
2328 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2330 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2331 in self.wanted_names]
2333 def _EnsureChildSizes(self, disk):
2334 """Ensure children of the disk have the needed disk size.
2336 This is valid mainly for DRBD8 and fixes an issue where the
2337 children have smaller disk size.
2339 @param disk: an L{ganeti.objects.Disk} object
2342 if disk.dev_type == constants.LD_DRBD8:
2343 assert disk.children, "Empty children for DRBD8?"
2344 fchild = disk.children[0]
2345 mismatch = fchild.size < disk.size
2347 self.LogInfo("Child disk has size %d, parent %d, fixing",
2348 fchild.size, disk.size)
2349 fchild.size = disk.size
2351 # and we recurse on this child only, not on the metadev
2352 return self._EnsureChildSizes(fchild) or mismatch
2356 def Exec(self, feedback_fn):
2357 """Verify the size of cluster disks.
2360 # TODO: check child disks too
2361 # TODO: check differences in size between primary/secondary nodes
2363 for instance in self.wanted_instances:
2364 pnode = instance.primary_node
2365 if pnode not in per_node_disks:
2366 per_node_disks[pnode] = []
2367 for idx, disk in enumerate(instance.disks):
2368 per_node_disks[pnode].append((instance, idx, disk))
2371 for node, dskl in per_node_disks.items():
2372 newl = [v[2].Copy() for v in dskl]
2374 self.cfg.SetDiskID(dsk, node)
2375 result = self.rpc.call_blockdev_getsizes(node, newl)
2377 self.LogWarning("Failure in blockdev_getsizes call to node"
2378 " %s, ignoring", node)
2380 if len(result.data) != len(dskl):
2381 self.LogWarning("Invalid result from node %s, ignoring node results",
2384 for ((instance, idx, disk), size) in zip(dskl, result.data):
2386 self.LogWarning("Disk %d of instance %s did not return size"
2387 " information, ignoring", idx, instance.name)
2389 if not isinstance(size, (int, long)):
2390 self.LogWarning("Disk %d of instance %s did not return valid"
2391 " size information, ignoring", idx, instance.name)
2394 if size != disk.size:
2395 self.LogInfo("Disk %d of instance %s has mismatched size,"
2396 " correcting: recorded %d, actual %d", idx,
2397 instance.name, disk.size, size)
2399 self.cfg.Update(instance, feedback_fn)
2400 changed.append((instance.name, idx, size))
2401 if self._EnsureChildSizes(disk):
2402 self.cfg.Update(instance, feedback_fn)
2403 changed.append((instance.name, idx, disk.size))
2407 class LURenameCluster(LogicalUnit):
2408 """Rename the cluster.
2411 HPATH = "cluster-rename"
2412 HTYPE = constants.HTYPE_CLUSTER
2413 _OP_REQP = [("name", _TNonEmptyString)]
2415 def BuildHooksEnv(self):
2420 "OP_TARGET": self.cfg.GetClusterName(),
2421 "NEW_NAME": self.op.name,
2423 mn = self.cfg.GetMasterNode()
2424 all_nodes = self.cfg.GetNodeList()
2425 return env, [mn], all_nodes
2427 def CheckPrereq(self):
2428 """Verify that the passed name is a valid one.
2431 hostname = utils.GetHostInfo(self.op.name)
2433 new_name = hostname.name
2434 self.ip = new_ip = hostname.ip
2435 old_name = self.cfg.GetClusterName()
2436 old_ip = self.cfg.GetMasterIP()
2437 if new_name == old_name and new_ip == old_ip:
2438 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2439 " cluster has changed",
2441 if new_ip != old_ip:
2442 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2443 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2444 " reachable on the network. Aborting." %
2445 new_ip, errors.ECODE_NOTUNIQUE)
2447 self.op.name = new_name
2449 def Exec(self, feedback_fn):
2450 """Rename the cluster.
2453 clustername = self.op.name
2456 # shutdown the master IP
2457 master = self.cfg.GetMasterNode()
2458 result = self.rpc.call_node_stop_master(master, False)
2459 result.Raise("Could not disable the master role")
2462 cluster = self.cfg.GetClusterInfo()
2463 cluster.cluster_name = clustername
2464 cluster.master_ip = ip
2465 self.cfg.Update(cluster, feedback_fn)
2467 # update the known hosts file
2468 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2469 node_list = self.cfg.GetNodeList()
2471 node_list.remove(master)
2474 result = self.rpc.call_upload_file(node_list,
2475 constants.SSH_KNOWN_HOSTS_FILE)
2476 for to_node, to_result in result.iteritems():
2477 msg = to_result.fail_msg
2479 msg = ("Copy of file %s to node %s failed: %s" %
2480 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2481 self.proc.LogWarning(msg)
2484 result = self.rpc.call_node_start_master(master, False, False)
2485 msg = result.fail_msg
2487 self.LogWarning("Could not re-enable the master role on"
2488 " the master, please restart manually: %s", msg)
2491 def _RecursiveCheckIfLVMBased(disk):
2492 """Check if the given disk or its children are lvm-based.
2494 @type disk: L{objects.Disk}
2495 @param disk: the disk to check
2497 @return: boolean indicating whether a LD_LV dev_type was found or not
2501 for chdisk in disk.children:
2502 if _RecursiveCheckIfLVMBased(chdisk):
2504 return disk.dev_type == constants.LD_LV
2507 class LUSetClusterParams(LogicalUnit):
2508 """Change the parameters of the cluster.
2511 HPATH = "cluster-modify"
2512 HTYPE = constants.HTYPE_CLUSTER
2514 ("hvparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2515 ("os_hvp", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2516 ("osparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2517 ("enabled_hypervisors",
2518 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2521 ("candidate_pool_size", None),
2524 ("remove_uids", None),
2530 def CheckArguments(self):
2534 if self.op.candidate_pool_size is not None:
2536 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2537 except (ValueError, TypeError), err:
2538 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2539 str(err), errors.ECODE_INVAL)
2540 if self.op.candidate_pool_size < 1:
2541 raise errors.OpPrereqError("At least one master candidate needed",
2544 _CheckBooleanOpField(self.op, "maintain_node_health")
2546 if self.op.uid_pool:
2547 uidpool.CheckUidPool(self.op.uid_pool)
2549 if self.op.add_uids:
2550 uidpool.CheckUidPool(self.op.add_uids)
2552 if self.op.remove_uids:
2553 uidpool.CheckUidPool(self.op.remove_uids)
2555 def ExpandNames(self):
2556 # FIXME: in the future maybe other cluster params won't require checking on
2557 # all nodes to be modified.
2558 self.needed_locks = {
2559 locking.LEVEL_NODE: locking.ALL_SET,
2561 self.share_locks[locking.LEVEL_NODE] = 1
2563 def BuildHooksEnv(self):
2568 "OP_TARGET": self.cfg.GetClusterName(),
2569 "NEW_VG_NAME": self.op.vg_name,
2571 mn = self.cfg.GetMasterNode()
2572 return env, [mn], [mn]
2574 def CheckPrereq(self):
2575 """Check prerequisites.
2577 This checks whether the given params don't conflict and
2578 if the given volume group is valid.
2581 if self.op.vg_name is not None and not self.op.vg_name:
2582 instances = self.cfg.GetAllInstancesInfo().values()
2583 for inst in instances:
2584 for disk in inst.disks:
2585 if _RecursiveCheckIfLVMBased(disk):
2586 raise errors.OpPrereqError("Cannot disable lvm storage while"
2587 " lvm-based instances exist",
2590 node_list = self.acquired_locks[locking.LEVEL_NODE]
2592 # if vg_name not None, checks given volume group on all nodes
2594 vglist = self.rpc.call_vg_list(node_list)
2595 for node in node_list:
2596 msg = vglist[node].fail_msg
2598 # ignoring down node
2599 self.LogWarning("Error while gathering data on node %s"
2600 " (ignoring node): %s", node, msg)
2602 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2604 constants.MIN_VG_SIZE)
2606 raise errors.OpPrereqError("Error on node '%s': %s" %
2607 (node, vgstatus), errors.ECODE_ENVIRON)
2609 self.cluster = cluster = self.cfg.GetClusterInfo()
2610 # validate params changes
2611 if self.op.beparams:
2612 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2613 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2615 if self.op.nicparams:
2616 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2617 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2618 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2621 # check all instances for consistency
2622 for instance in self.cfg.GetAllInstancesInfo().values():
2623 for nic_idx, nic in enumerate(instance.nics):
2624 params_copy = copy.deepcopy(nic.nicparams)
2625 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2627 # check parameter syntax
2629 objects.NIC.CheckParameterSyntax(params_filled)
2630 except errors.ConfigurationError, err:
2631 nic_errors.append("Instance %s, nic/%d: %s" %
2632 (instance.name, nic_idx, err))
2634 # if we're moving instances to routed, check that they have an ip
2635 target_mode = params_filled[constants.NIC_MODE]
2636 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2637 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2638 (instance.name, nic_idx))
2640 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2641 "\n".join(nic_errors))
2643 # hypervisor list/parameters
2644 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2645 if self.op.hvparams:
2646 for hv_name, hv_dict in self.op.hvparams.items():
2647 if hv_name not in self.new_hvparams:
2648 self.new_hvparams[hv_name] = hv_dict
2650 self.new_hvparams[hv_name].update(hv_dict)
2652 # os hypervisor parameters
2653 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2655 for os_name, hvs in self.op.os_hvp.items():
2656 if os_name not in self.new_os_hvp:
2657 self.new_os_hvp[os_name] = hvs
2659 for hv_name, hv_dict in hvs.items():
2660 if hv_name not in self.new_os_hvp[os_name]:
2661 self.new_os_hvp[os_name][hv_name] = hv_dict
2663 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2666 self.new_osp = objects.FillDict(cluster.osparams, {})
2667 if self.op.osparams:
2668 for os_name, osp in self.op.osparams.items():
2669 if os_name not in self.new_osp:
2670 self.new_osp[os_name] = {}
2672 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2675 if not self.new_osp[os_name]:
2676 # we removed all parameters
2677 del self.new_osp[os_name]
2679 # check the parameter validity (remote check)
2680 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2681 os_name, self.new_osp[os_name])
2683 # changes to the hypervisor list
2684 if self.op.enabled_hypervisors is not None:
2685 self.hv_list = self.op.enabled_hypervisors
2686 for hv in self.hv_list:
2687 # if the hypervisor doesn't already exist in the cluster
2688 # hvparams, we initialize it to empty, and then (in both
2689 # cases) we make sure to fill the defaults, as we might not
2690 # have a complete defaults list if the hypervisor wasn't
2692 if hv not in new_hvp:
2694 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2695 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2697 self.hv_list = cluster.enabled_hypervisors
2699 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2700 # either the enabled list has changed, or the parameters have, validate
2701 for hv_name, hv_params in self.new_hvparams.items():
2702 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2703 (self.op.enabled_hypervisors and
2704 hv_name in self.op.enabled_hypervisors)):
2705 # either this is a new hypervisor, or its parameters have changed
2706 hv_class = hypervisor.GetHypervisor(hv_name)
2707 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2708 hv_class.CheckParameterSyntax(hv_params)
2709 _CheckHVParams(self, node_list, hv_name, hv_params)
2712 # no need to check any newly-enabled hypervisors, since the
2713 # defaults have already been checked in the above code-block
2714 for os_name, os_hvp in self.new_os_hvp.items():
2715 for hv_name, hv_params in os_hvp.items():
2716 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2717 # we need to fill in the new os_hvp on top of the actual hv_p
2718 cluster_defaults = self.new_hvparams.get(hv_name, {})
2719 new_osp = objects.FillDict(cluster_defaults, hv_params)
2720 hv_class = hypervisor.GetHypervisor(hv_name)
2721 hv_class.CheckParameterSyntax(new_osp)
2722 _CheckHVParams(self, node_list, hv_name, new_osp)
2725 def Exec(self, feedback_fn):
2726 """Change the parameters of the cluster.
2729 if self.op.vg_name is not None:
2730 new_volume = self.op.vg_name
2733 if new_volume != self.cfg.GetVGName():
2734 self.cfg.SetVGName(new_volume)
2736 feedback_fn("Cluster LVM configuration already in desired"
2737 " state, not changing")
2738 if self.op.hvparams:
2739 self.cluster.hvparams = self.new_hvparams
2741 self.cluster.os_hvp = self.new_os_hvp
2742 if self.op.enabled_hypervisors is not None:
2743 self.cluster.hvparams = self.new_hvparams
2744 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2745 if self.op.beparams:
2746 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2747 if self.op.nicparams:
2748 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2749 if self.op.osparams:
2750 self.cluster.osparams = self.new_osp
2752 if self.op.candidate_pool_size is not None:
2753 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2754 # we need to update the pool size here, otherwise the save will fail
2755 _AdjustCandidatePool(self, [])
2757 if self.op.maintain_node_health is not None:
2758 self.cluster.maintain_node_health = self.op.maintain_node_health
2760 if self.op.add_uids is not None:
2761 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2763 if self.op.remove_uids is not None:
2764 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2766 if self.op.uid_pool is not None:
2767 self.cluster.uid_pool = self.op.uid_pool
2769 self.cfg.Update(self.cluster, feedback_fn)
2772 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2773 """Distribute additional files which are part of the cluster configuration.
2775 ConfigWriter takes care of distributing the config and ssconf files, but
2776 there are more files which should be distributed to all nodes. This function
2777 makes sure those are copied.
2779 @param lu: calling logical unit
2780 @param additional_nodes: list of nodes not in the config to distribute to
2783 # 1. Gather target nodes
2784 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2785 dist_nodes = lu.cfg.GetOnlineNodeList()
2786 if additional_nodes is not None:
2787 dist_nodes.extend(additional_nodes)
2788 if myself.name in dist_nodes:
2789 dist_nodes.remove(myself.name)
2791 # 2. Gather files to distribute
2792 dist_files = set([constants.ETC_HOSTS,
2793 constants.SSH_KNOWN_HOSTS_FILE,
2794 constants.RAPI_CERT_FILE,
2795 constants.RAPI_USERS_FILE,
2796 constants.CONFD_HMAC_KEY,
2797 constants.CLUSTER_DOMAIN_SECRET_FILE,
2800 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2801 for hv_name in enabled_hypervisors:
2802 hv_class = hypervisor.GetHypervisor(hv_name)
2803 dist_files.update(hv_class.GetAncillaryFiles())
2805 # 3. Perform the files upload
2806 for fname in dist_files:
2807 if os.path.exists(fname):
2808 result = lu.rpc.call_upload_file(dist_nodes, fname)
2809 for to_node, to_result in result.items():
2810 msg = to_result.fail_msg
2812 msg = ("Copy of file %s to node %s failed: %s" %
2813 (fname, to_node, msg))
2814 lu.proc.LogWarning(msg)
2817 class LURedistributeConfig(NoHooksLU):
2818 """Force the redistribution of cluster configuration.
2820 This is a very simple LU.
2826 def ExpandNames(self):
2827 self.needed_locks = {
2828 locking.LEVEL_NODE: locking.ALL_SET,
2830 self.share_locks[locking.LEVEL_NODE] = 1
2832 def Exec(self, feedback_fn):
2833 """Redistribute the configuration.
2836 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2837 _RedistributeAncillaryFiles(self)
2840 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2841 """Sleep and poll for an instance's disk to sync.
2844 if not instance.disks or disks is not None and not disks:
2847 disks = _ExpandCheckDisks(instance, disks)
2850 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2852 node = instance.primary_node
2855 lu.cfg.SetDiskID(dev, node)
2857 # TODO: Convert to utils.Retry
2860 degr_retries = 10 # in seconds, as we sleep 1 second each time
2864 cumul_degraded = False
2865 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2866 msg = rstats.fail_msg
2868 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2871 raise errors.RemoteError("Can't contact node %s for mirror data,"
2872 " aborting." % node)
2875 rstats = rstats.payload
2877 for i, mstat in enumerate(rstats):
2879 lu.LogWarning("Can't compute data for node %s/%s",
2880 node, disks[i].iv_name)
2883 cumul_degraded = (cumul_degraded or
2884 (mstat.is_degraded and mstat.sync_percent is None))
2885 if mstat.sync_percent is not None:
2887 if mstat.estimated_time is not None:
2888 rem_time = ("%s remaining (estimated)" %
2889 utils.FormatSeconds(mstat.estimated_time))
2890 max_time = mstat.estimated_time
2892 rem_time = "no time estimate"
2893 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2894 (disks[i].iv_name, mstat.sync_percent, rem_time))
2896 # if we're done but degraded, let's do a few small retries, to
2897 # make sure we see a stable and not transient situation; therefore
2898 # we force restart of the loop
2899 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2900 logging.info("Degraded disks found, %d retries left", degr_retries)
2908 time.sleep(min(60, max_time))
2911 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2912 return not cumul_degraded
2915 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2916 """Check that mirrors are not degraded.
2918 The ldisk parameter, if True, will change the test from the
2919 is_degraded attribute (which represents overall non-ok status for
2920 the device(s)) to the ldisk (representing the local storage status).
2923 lu.cfg.SetDiskID(dev, node)
2927 if on_primary or dev.AssembleOnSecondary():
2928 rstats = lu.rpc.call_blockdev_find(node, dev)
2929 msg = rstats.fail_msg
2931 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2933 elif not rstats.payload:
2934 lu.LogWarning("Can't find disk on node %s", node)
2938 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2940 result = result and not rstats.payload.is_degraded
2943 for child in dev.children:
2944 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2949 class LUDiagnoseOS(NoHooksLU):
2950 """Logical unit for OS diagnose/query.
2954 ("output_fields", _TListOf(_TNonEmptyString)),
2955 ("names", _TListOf(_TNonEmptyString)),
2958 _FIELDS_STATIC = utils.FieldSet()
2959 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2960 "parameters", "api_versions")
2962 def CheckArguments(self):
2964 raise errors.OpPrereqError("Selective OS query not supported",
2967 _CheckOutputFields(static=self._FIELDS_STATIC,
2968 dynamic=self._FIELDS_DYNAMIC,
2969 selected=self.op.output_fields)
2971 def ExpandNames(self):
2972 # Lock all nodes, in shared mode
2973 # Temporary removal of locks, should be reverted later
2974 # TODO: reintroduce locks when they are lighter-weight
2975 self.needed_locks = {}
2976 #self.share_locks[locking.LEVEL_NODE] = 1
2977 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2980 def _DiagnoseByOS(rlist):
2981 """Remaps a per-node return list into an a per-os per-node dictionary
2983 @param rlist: a map with node names as keys and OS objects as values
2986 @return: a dictionary with osnames as keys and as value another
2987 map, with nodes as keys and tuples of (path, status, diagnose,
2988 variants, parameters, api_versions) as values, eg::
2990 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2991 (/srv/..., False, "invalid api")],
2992 "node2": [(/srv/..., True, "", [], [])]}
2997 # we build here the list of nodes that didn't fail the RPC (at RPC
2998 # level), so that nodes with a non-responding node daemon don't
2999 # make all OSes invalid
3000 good_nodes = [node_name for node_name in rlist
3001 if not rlist[node_name].fail_msg]
3002 for node_name, nr in rlist.items():
3003 if nr.fail_msg or not nr.payload:
3005 for (name, path, status, diagnose, variants,
3006 params, api_versions) in nr.payload:
3007 if name not in all_os:
3008 # build a list of nodes for this os containing empty lists
3009 # for each node in node_list
3011 for nname in good_nodes:
3012 all_os[name][nname] = []
3013 # convert params from [name, help] to (name, help)
3014 params = [tuple(v) for v in params]
3015 all_os[name][node_name].append((path, status, diagnose,
3016 variants, params, api_versions))
3019 def Exec(self, feedback_fn):
3020 """Compute the list of OSes.
3023 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3024 node_data = self.rpc.call_os_diagnose(valid_nodes)
3025 pol = self._DiagnoseByOS(node_data)
3028 for os_name, os_data in pol.items():
3031 (variants, params, api_versions) = null_state = (set(), set(), set())
3032 for idx, osl in enumerate(os_data.values()):
3033 valid = bool(valid and osl and osl[0][1])
3035 (variants, params, api_versions) = null_state
3037 node_variants, node_params, node_api = osl[0][3:6]
3038 if idx == 0: # first entry
3039 variants = set(node_variants)
3040 params = set(node_params)
3041 api_versions = set(node_api)
3042 else: # keep consistency
3043 variants.intersection_update(node_variants)
3044 params.intersection_update(node_params)
3045 api_versions.intersection_update(node_api)
3047 for field in self.op.output_fields:
3050 elif field == "valid":
3052 elif field == "node_status":
3053 # this is just a copy of the dict
3055 for node_name, nos_list in os_data.items():
3056 val[node_name] = nos_list
3057 elif field == "variants":
3058 val = list(variants)
3059 elif field == "parameters":
3061 elif field == "api_versions":
3062 val = list(api_versions)
3064 raise errors.ParameterError(field)
3071 class LURemoveNode(LogicalUnit):
3072 """Logical unit for removing a node.
3075 HPATH = "node-remove"
3076 HTYPE = constants.HTYPE_NODE
3077 _OP_REQP = [("node_name", _TNonEmptyString)]
3079 def BuildHooksEnv(self):
3082 This doesn't run on the target node in the pre phase as a failed
3083 node would then be impossible to remove.
3087 "OP_TARGET": self.op.node_name,
3088 "NODE_NAME": self.op.node_name,
3090 all_nodes = self.cfg.GetNodeList()
3092 all_nodes.remove(self.op.node_name)
3094 logging.warning("Node %s which is about to be removed not found"
3095 " in the all nodes list", self.op.node_name)
3096 return env, all_nodes, all_nodes
3098 def CheckPrereq(self):
3099 """Check prerequisites.
3102 - the node exists in the configuration
3103 - it does not have primary or secondary instances
3104 - it's not the master
3106 Any errors are signaled by raising errors.OpPrereqError.
3109 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3110 node = self.cfg.GetNodeInfo(self.op.node_name)
3111 assert node is not None
3113 instance_list = self.cfg.GetInstanceList()
3115 masternode = self.cfg.GetMasterNode()
3116 if node.name == masternode:
3117 raise errors.OpPrereqError("Node is the master node,"
3118 " you need to failover first.",
3121 for instance_name in instance_list:
3122 instance = self.cfg.GetInstanceInfo(instance_name)
3123 if node.name in instance.all_nodes:
3124 raise errors.OpPrereqError("Instance %s is still running on the node,"
3125 " please remove first." % instance_name,
3127 self.op.node_name = node.name
3130 def Exec(self, feedback_fn):
3131 """Removes the node from the cluster.
3135 logging.info("Stopping the node daemon and removing configs from node %s",
3138 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3140 # Promote nodes to master candidate as needed
3141 _AdjustCandidatePool(self, exceptions=[node.name])
3142 self.context.RemoveNode(node.name)
3144 # Run post hooks on the node before it's removed
3145 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3147 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3149 # pylint: disable-msg=W0702
3150 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3152 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3153 msg = result.fail_msg
3155 self.LogWarning("Errors encountered on the remote node while leaving"
3156 " the cluster: %s", msg)
3158 # Remove node from our /etc/hosts
3159 if self.cfg.GetClusterInfo().modify_etc_hosts:
3160 # FIXME: this should be done via an rpc call to node daemon
3161 utils.RemoveHostFromEtcHosts(node.name)
3162 _RedistributeAncillaryFiles(self)
3165 class LUQueryNodes(NoHooksLU):
3166 """Logical unit for querying nodes.
3169 # pylint: disable-msg=W0142
3171 ("output_fields", _TListOf(_TNonEmptyString)),
3172 ("names", _TListOf(_TNonEmptyString)),
3173 ("use_locking", _TBool),
3177 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3178 "master_candidate", "offline", "drained"]
3180 _FIELDS_DYNAMIC = utils.FieldSet(
3182 "mtotal", "mnode", "mfree",
3184 "ctotal", "cnodes", "csockets",
3187 _FIELDS_STATIC = utils.FieldSet(*[
3188 "pinst_cnt", "sinst_cnt",
3189 "pinst_list", "sinst_list",
3190 "pip", "sip", "tags",
3192 "role"] + _SIMPLE_FIELDS
3195 def CheckArguments(self):
3196 _CheckOutputFields(static=self._FIELDS_STATIC,
3197 dynamic=self._FIELDS_DYNAMIC,
3198 selected=self.op.output_fields)
3200 def ExpandNames(self):
3201 self.needed_locks = {}
3202 self.share_locks[locking.LEVEL_NODE] = 1
3205 self.wanted = _GetWantedNodes(self, self.op.names)
3207 self.wanted = locking.ALL_SET
3209 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3210 self.do_locking = self.do_node_query and self.op.use_locking
3212 # if we don't request only static fields, we need to lock the nodes
3213 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3215 def Exec(self, feedback_fn):
3216 """Computes the list of nodes and their attributes.
3219 all_info = self.cfg.GetAllNodesInfo()
3221 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3222 elif self.wanted != locking.ALL_SET:
3223 nodenames = self.wanted
3224 missing = set(nodenames).difference(all_info.keys())
3226 raise errors.OpExecError(
3227 "Some nodes were removed before retrieving their data: %s" % missing)
3229 nodenames = all_info.keys()
3231 nodenames = utils.NiceSort(nodenames)
3232 nodelist = [all_info[name] for name in nodenames]
3234 # begin data gathering
3236 if self.do_node_query:
3238 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3239 self.cfg.GetHypervisorType())
3240 for name in nodenames:
3241 nodeinfo = node_data[name]
3242 if not nodeinfo.fail_msg and nodeinfo.payload:
3243 nodeinfo = nodeinfo.payload
3244 fn = utils.TryConvert
3246 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3247 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3248 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3249 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3250 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3251 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3252 "bootid": nodeinfo.get('bootid', None),
3253 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3254 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3257 live_data[name] = {}
3259 live_data = dict.fromkeys(nodenames, {})
3261 node_to_primary = dict([(name, set()) for name in nodenames])
3262 node_to_secondary = dict([(name, set()) for name in nodenames])
3264 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3265 "sinst_cnt", "sinst_list"))
3266 if inst_fields & frozenset(self.op.output_fields):
3267 inst_data = self.cfg.GetAllInstancesInfo()
3269 for inst in inst_data.values():
3270 if inst.primary_node in node_to_primary:
3271 node_to_primary[inst.primary_node].add(inst.name)
3272 for secnode in inst.secondary_nodes:
3273 if secnode in node_to_secondary:
3274 node_to_secondary[secnode].add(inst.name)
3276 master_node = self.cfg.GetMasterNode()
3278 # end data gathering
3281 for node in nodelist:
3283 for field in self.op.output_fields:
3284 if field in self._SIMPLE_FIELDS:
3285 val = getattr(node, field)
3286 elif field == "pinst_list":
3287 val = list(node_to_primary[node.name])
3288 elif field == "sinst_list":
3289 val = list(node_to_secondary[node.name])
3290 elif field == "pinst_cnt":
3291 val = len(node_to_primary[node.name])
3292 elif field == "sinst_cnt":
3293 val = len(node_to_secondary[node.name])
3294 elif field == "pip":
3295 val = node.primary_ip
3296 elif field == "sip":
3297 val = node.secondary_ip
3298 elif field == "tags":
3299 val = list(node.GetTags())
3300 elif field == "master":
3301 val = node.name == master_node
3302 elif self._FIELDS_DYNAMIC.Matches(field):
3303 val = live_data[node.name].get(field, None)
3304 elif field == "role":
3305 if node.name == master_node:
3307 elif node.master_candidate:
3316 raise errors.ParameterError(field)
3317 node_output.append(val)
3318 output.append(node_output)
3323 class LUQueryNodeVolumes(NoHooksLU):
3324 """Logical unit for getting volumes on node(s).
3328 ("nodes", _TListOf(_TNonEmptyString)),
3329 ("output_fields", _TListOf(_TNonEmptyString)),
3332 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3333 _FIELDS_STATIC = utils.FieldSet("node")
3335 def CheckArguments(self):
3336 _CheckOutputFields(static=self._FIELDS_STATIC,
3337 dynamic=self._FIELDS_DYNAMIC,
3338 selected=self.op.output_fields)
3340 def ExpandNames(self):
3341 self.needed_locks = {}
3342 self.share_locks[locking.LEVEL_NODE] = 1
3343 if not self.op.nodes:
3344 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3346 self.needed_locks[locking.LEVEL_NODE] = \
3347 _GetWantedNodes(self, self.op.nodes)
3349 def Exec(self, feedback_fn):
3350 """Computes the list of nodes and their attributes.
3353 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3354 volumes = self.rpc.call_node_volumes(nodenames)
3356 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3357 in self.cfg.GetInstanceList()]
3359 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3362 for node in nodenames:
3363 nresult = volumes[node]
3366 msg = nresult.fail_msg
3368 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3371 node_vols = nresult.payload[:]
3372 node_vols.sort(key=lambda vol: vol['dev'])
3374 for vol in node_vols:
3376 for field in self.op.output_fields:
3379 elif field == "phys":
3383 elif field == "name":
3385 elif field == "size":
3386 val = int(float(vol['size']))
3387 elif field == "instance":
3389 if node not in lv_by_node[inst]:
3391 if vol['name'] in lv_by_node[inst][node]:
3397 raise errors.ParameterError(field)
3398 node_output.append(str(val))
3400 output.append(node_output)
3405 class LUQueryNodeStorage(NoHooksLU):
3406 """Logical unit for getting information on storage units on node(s).
3409 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3411 ("nodes", _TListOf(_TNonEmptyString)),
3412 ("storage_type", _CheckStorageType),
3413 ("output_fields", _TListOf(_TNonEmptyString)),
3415 _OP_DEFS = [("name", None)]
3418 def CheckArguments(self):
3419 _CheckOutputFields(static=self._FIELDS_STATIC,
3420 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3421 selected=self.op.output_fields)
3423 def ExpandNames(self):
3424 self.needed_locks = {}
3425 self.share_locks[locking.LEVEL_NODE] = 1
3428 self.needed_locks[locking.LEVEL_NODE] = \
3429 _GetWantedNodes(self, self.op.nodes)
3431 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3433 def Exec(self, feedback_fn):
3434 """Computes the list of nodes and their attributes.
3437 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3439 # Always get name to sort by
3440 if constants.SF_NAME in self.op.output_fields:
3441 fields = self.op.output_fields[:]
3443 fields = [constants.SF_NAME] + self.op.output_fields
3445 # Never ask for node or type as it's only known to the LU
3446 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3447 while extra in fields:
3448 fields.remove(extra)
3450 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3451 name_idx = field_idx[constants.SF_NAME]
3453 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3454 data = self.rpc.call_storage_list(self.nodes,
3455 self.op.storage_type, st_args,
3456 self.op.name, fields)
3460 for node in utils.NiceSort(self.nodes):
3461 nresult = data[node]
3465 msg = nresult.fail_msg
3467 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3470 rows = dict([(row[name_idx], row) for row in nresult.payload])
3472 for name in utils.NiceSort(rows.keys()):
3477 for field in self.op.output_fields:
3478 if field == constants.SF_NODE:
3480 elif field == constants.SF_TYPE:
3481 val = self.op.storage_type
3482 elif field in field_idx:
3483 val = row[field_idx[field]]
3485 raise errors.ParameterError(field)
3494 class LUModifyNodeStorage(NoHooksLU):
3495 """Logical unit for modifying a storage volume on a node.
3499 ("node_name", _TNonEmptyString),
3500 ("storage_type", _CheckStorageType),
3501 ("name", _TNonEmptyString),
3502 ("changes", _TDict),
3506 def CheckArguments(self):
3507 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3509 storage_type = self.op.storage_type
3512 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3514 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3515 " modified" % storage_type,
3518 diff = set(self.op.changes.keys()) - modifiable
3520 raise errors.OpPrereqError("The following fields can not be modified for"
3521 " storage units of type '%s': %r" %
3522 (storage_type, list(diff)),
3525 def ExpandNames(self):
3526 self.needed_locks = {
3527 locking.LEVEL_NODE: self.op.node_name,
3530 def Exec(self, feedback_fn):
3531 """Computes the list of nodes and their attributes.
3534 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3535 result = self.rpc.call_storage_modify(self.op.node_name,
3536 self.op.storage_type, st_args,
3537 self.op.name, self.op.changes)
3538 result.Raise("Failed to modify storage unit '%s' on %s" %
3539 (self.op.name, self.op.node_name))
3542 class LUAddNode(LogicalUnit):
3543 """Logical unit for adding node to the cluster.
3547 HTYPE = constants.HTYPE_NODE
3549 ("node_name", _TNonEmptyString),
3551 _OP_DEFS = [("secondary_ip", None)]
3553 def CheckArguments(self):
3554 # validate/normalize the node name
3555 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3557 def BuildHooksEnv(self):
3560 This will run on all nodes before, and on all nodes + the new node after.
3564 "OP_TARGET": self.op.node_name,
3565 "NODE_NAME": self.op.node_name,
3566 "NODE_PIP": self.op.primary_ip,
3567 "NODE_SIP": self.op.secondary_ip,
3569 nodes_0 = self.cfg.GetNodeList()
3570 nodes_1 = nodes_0 + [self.op.node_name, ]
3571 return env, nodes_0, nodes_1
3573 def CheckPrereq(self):
3574 """Check prerequisites.
3577 - the new node is not already in the config
3579 - its parameters (single/dual homed) matches the cluster
3581 Any errors are signaled by raising errors.OpPrereqError.
3584 node_name = self.op.node_name
3587 dns_data = utils.GetHostInfo(node_name)
3589 node = dns_data.name
3590 primary_ip = self.op.primary_ip = dns_data.ip
3591 if self.op.secondary_ip is None:
3592 self.op.secondary_ip = primary_ip
3593 if not utils.IsValidIP(self.op.secondary_ip):
3594 raise errors.OpPrereqError("Invalid secondary IP given",
3596 secondary_ip = self.op.secondary_ip
3598 node_list = cfg.GetNodeList()
3599 if not self.op.readd and node in node_list:
3600 raise errors.OpPrereqError("Node %s is already in the configuration" %
3601 node, errors.ECODE_EXISTS)
3602 elif self.op.readd and node not in node_list:
3603 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3606 self.changed_primary_ip = False
3608 for existing_node_name in node_list:
3609 existing_node = cfg.GetNodeInfo(existing_node_name)
3611 if self.op.readd and node == existing_node_name:
3612 if existing_node.secondary_ip != secondary_ip:
3613 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3614 " address configuration as before",
3616 if existing_node.primary_ip != primary_ip:
3617 self.changed_primary_ip = True
3621 if (existing_node.primary_ip == primary_ip or
3622 existing_node.secondary_ip == primary_ip or
3623 existing_node.primary_ip == secondary_ip or
3624 existing_node.secondary_ip == secondary_ip):
3625 raise errors.OpPrereqError("New node ip address(es) conflict with"
3626 " existing node %s" % existing_node.name,
3627 errors.ECODE_NOTUNIQUE)
3629 # check that the type of the node (single versus dual homed) is the
3630 # same as for the master
3631 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3632 master_singlehomed = myself.secondary_ip == myself.primary_ip
3633 newbie_singlehomed = secondary_ip == primary_ip
3634 if master_singlehomed != newbie_singlehomed:
3635 if master_singlehomed:
3636 raise errors.OpPrereqError("The master has no private ip but the"
3637 " new node has one",
3640 raise errors.OpPrereqError("The master has a private ip but the"
3641 " new node doesn't have one",
3644 # checks reachability
3645 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3646 raise errors.OpPrereqError("Node not reachable by ping",
3647 errors.ECODE_ENVIRON)
3649 if not newbie_singlehomed:
3650 # check reachability from my secondary ip to newbie's secondary ip
3651 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3652 source=myself.secondary_ip):
3653 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3654 " based ping to noded port",
3655 errors.ECODE_ENVIRON)
3662 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3665 self.new_node = self.cfg.GetNodeInfo(node)
3666 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3668 self.new_node = objects.Node(name=node,
3669 primary_ip=primary_ip,
3670 secondary_ip=secondary_ip,
3671 master_candidate=self.master_candidate,
3672 offline=False, drained=False)
3674 def Exec(self, feedback_fn):
3675 """Adds the new node to the cluster.
3678 new_node = self.new_node
3679 node = new_node.name
3681 # for re-adds, reset the offline/drained/master-candidate flags;
3682 # we need to reset here, otherwise offline would prevent RPC calls
3683 # later in the procedure; this also means that if the re-add
3684 # fails, we are left with a non-offlined, broken node
3686 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3687 self.LogInfo("Readding a node, the offline/drained flags were reset")
3688 # if we demote the node, we do cleanup later in the procedure
3689 new_node.master_candidate = self.master_candidate
3690 if self.changed_primary_ip:
3691 new_node.primary_ip = self.op.primary_ip
3693 # notify the user about any possible mc promotion
3694 if new_node.master_candidate:
3695 self.LogInfo("Node will be a master candidate")
3697 # check connectivity
3698 result = self.rpc.call_version([node])[node]
3699 result.Raise("Can't get version information from node %s" % node)
3700 if constants.PROTOCOL_VERSION == result.payload:
3701 logging.info("Communication to node %s fine, sw version %s match",
3702 node, result.payload)
3704 raise errors.OpExecError("Version mismatch master version %s,"
3705 " node version %s" %
3706 (constants.PROTOCOL_VERSION, result.payload))
3709 if self.cfg.GetClusterInfo().modify_ssh_setup:
3710 logging.info("Copy ssh key to node %s", node)
3711 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3713 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3714 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3718 keyarray.append(utils.ReadFile(i))
3720 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3721 keyarray[2], keyarray[3], keyarray[4],
3723 result.Raise("Cannot transfer ssh keys to the new node")
3725 # Add node to our /etc/hosts, and add key to known_hosts
3726 if self.cfg.GetClusterInfo().modify_etc_hosts:
3727 # FIXME: this should be done via an rpc call to node daemon
3728 utils.AddHostToEtcHosts(new_node.name)
3730 if new_node.secondary_ip != new_node.primary_ip:
3731 result = self.rpc.call_node_has_ip_address(new_node.name,
3732 new_node.secondary_ip)
3733 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3734 prereq=True, ecode=errors.ECODE_ENVIRON)
3735 if not result.payload:
3736 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3737 " you gave (%s). Please fix and re-run this"
3738 " command." % new_node.secondary_ip)
3740 node_verify_list = [self.cfg.GetMasterNode()]
3741 node_verify_param = {
3742 constants.NV_NODELIST: [node],
3743 # TODO: do a node-net-test as well?
3746 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3747 self.cfg.GetClusterName())
3748 for verifier in node_verify_list:
3749 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3750 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3752 for failed in nl_payload:
3753 feedback_fn("ssh/hostname verification failed"
3754 " (checking from %s): %s" %
3755 (verifier, nl_payload[failed]))
3756 raise errors.OpExecError("ssh/hostname verification failed.")
3759 _RedistributeAncillaryFiles(self)
3760 self.context.ReaddNode(new_node)
3761 # make sure we redistribute the config
3762 self.cfg.Update(new_node, feedback_fn)
3763 # and make sure the new node will not have old files around
3764 if not new_node.master_candidate:
3765 result = self.rpc.call_node_demote_from_mc(new_node.name)
3766 msg = result.fail_msg
3768 self.LogWarning("Node failed to demote itself from master"
3769 " candidate status: %s" % msg)
3771 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3772 self.context.AddNode(new_node, self.proc.GetECId())
3775 class LUSetNodeParams(LogicalUnit):
3776 """Modifies the parameters of a node.
3779 HPATH = "node-modify"
3780 HTYPE = constants.HTYPE_NODE
3781 _OP_REQP = [("node_name", _TNonEmptyString)]
3784 def CheckArguments(self):
3785 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3786 _CheckBooleanOpField(self.op, 'master_candidate')
3787 _CheckBooleanOpField(self.op, 'offline')
3788 _CheckBooleanOpField(self.op, 'drained')
3789 _CheckBooleanOpField(self.op, 'auto_promote')
3790 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3791 if all_mods.count(None) == 3:
3792 raise errors.OpPrereqError("Please pass at least one modification",
3794 if all_mods.count(True) > 1:
3795 raise errors.OpPrereqError("Can't set the node into more than one"
3796 " state at the same time",
3799 # Boolean value that tells us whether we're offlining or draining the node
3800 self.offline_or_drain = (self.op.offline == True or
3801 self.op.drained == True)
3802 self.deoffline_or_drain = (self.op.offline == False or
3803 self.op.drained == False)
3804 self.might_demote = (self.op.master_candidate == False or
3805 self.offline_or_drain)
3807 self.lock_all = self.op.auto_promote and self.might_demote
3810 def ExpandNames(self):
3812 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3814 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3816 def BuildHooksEnv(self):
3819 This runs on the master node.
3823 "OP_TARGET": self.op.node_name,
3824 "MASTER_CANDIDATE": str(self.op.master_candidate),
3825 "OFFLINE": str(self.op.offline),
3826 "DRAINED": str(self.op.drained),
3828 nl = [self.cfg.GetMasterNode(),
3832 def CheckPrereq(self):
3833 """Check prerequisites.
3835 This only checks the instance list against the existing names.
3838 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3840 if (self.op.master_candidate is not None or
3841 self.op.drained is not None or
3842 self.op.offline is not None):
3843 # we can't change the master's node flags
3844 if self.op.node_name == self.cfg.GetMasterNode():
3845 raise errors.OpPrereqError("The master role can be changed"
3846 " only via masterfailover",
3850 if node.master_candidate and self.might_demote and not self.lock_all:
3851 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3852 # check if after removing the current node, we're missing master
3854 (mc_remaining, mc_should, _) = \
3855 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3856 if mc_remaining < mc_should:
3857 raise errors.OpPrereqError("Not enough master candidates, please"
3858 " pass auto_promote to allow promotion",
3861 if (self.op.master_candidate == True and
3862 ((node.offline and not self.op.offline == False) or
3863 (node.drained and not self.op.drained == False))):
3864 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3865 " to master_candidate" % node.name,
3868 # If we're being deofflined/drained, we'll MC ourself if needed
3869 if (self.deoffline_or_drain and not self.offline_or_drain and not
3870 self.op.master_candidate == True and not node.master_candidate):
3871 self.op.master_candidate = _DecideSelfPromotion(self)
3872 if self.op.master_candidate:
3873 self.LogInfo("Autopromoting node to master candidate")
3877 def Exec(self, feedback_fn):
3886 if self.op.offline is not None:
3887 node.offline = self.op.offline
3888 result.append(("offline", str(self.op.offline)))
3889 if self.op.offline == True:
3890 if node.master_candidate:
3891 node.master_candidate = False
3893 result.append(("master_candidate", "auto-demotion due to offline"))
3895 node.drained = False
3896 result.append(("drained", "clear drained status due to offline"))
3898 if self.op.master_candidate is not None:
3899 node.master_candidate = self.op.master_candidate
3901 result.append(("master_candidate", str(self.op.master_candidate)))
3902 if self.op.master_candidate == False:
3903 rrc = self.rpc.call_node_demote_from_mc(node.name)
3906 self.LogWarning("Node failed to demote itself: %s" % msg)
3908 if self.op.drained is not None:
3909 node.drained = self.op.drained
3910 result.append(("drained", str(self.op.drained)))
3911 if self.op.drained == True:
3912 if node.master_candidate:
3913 node.master_candidate = False
3915 result.append(("master_candidate", "auto-demotion due to drain"))
3916 rrc = self.rpc.call_node_demote_from_mc(node.name)
3919 self.LogWarning("Node failed to demote itself: %s" % msg)
3921 node.offline = False
3922 result.append(("offline", "clear offline status due to drain"))
3924 # we locked all nodes, we adjust the CP before updating this node
3926 _AdjustCandidatePool(self, [node.name])
3928 # this will trigger configuration file update, if needed
3929 self.cfg.Update(node, feedback_fn)
3931 # this will trigger job queue propagation or cleanup
3933 self.context.ReaddNode(node)
3938 class LUPowercycleNode(NoHooksLU):
3939 """Powercycles a node.
3943 ("node_name", _TNonEmptyString),
3948 def CheckArguments(self):
3949 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3950 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3951 raise errors.OpPrereqError("The node is the master and the force"
3952 " parameter was not set",
3955 def ExpandNames(self):
3956 """Locking for PowercycleNode.
3958 This is a last-resort option and shouldn't block on other
3959 jobs. Therefore, we grab no locks.
3962 self.needed_locks = {}
3964 def Exec(self, feedback_fn):
3968 result = self.rpc.call_node_powercycle(self.op.node_name,
3969 self.cfg.GetHypervisorType())
3970 result.Raise("Failed to schedule the reboot")
3971 return result.payload
3974 class LUQueryClusterInfo(NoHooksLU):
3975 """Query cluster configuration.
3981 def ExpandNames(self):
3982 self.needed_locks = {}
3984 def Exec(self, feedback_fn):
3985 """Return cluster config.
3988 cluster = self.cfg.GetClusterInfo()
3991 # Filter just for enabled hypervisors
3992 for os_name, hv_dict in cluster.os_hvp.items():
3993 os_hvp[os_name] = {}
3994 for hv_name, hv_params in hv_dict.items():
3995 if hv_name in cluster.enabled_hypervisors:
3996 os_hvp[os_name][hv_name] = hv_params
3999 "software_version": constants.RELEASE_VERSION,
4000 "protocol_version": constants.PROTOCOL_VERSION,
4001 "config_version": constants.CONFIG_VERSION,
4002 "os_api_version": max(constants.OS_API_VERSIONS),
4003 "export_version": constants.EXPORT_VERSION,
4004 "architecture": (platform.architecture()[0], platform.machine()),
4005 "name": cluster.cluster_name,
4006 "master": cluster.master_node,
4007 "default_hypervisor": cluster.enabled_hypervisors[0],
4008 "enabled_hypervisors": cluster.enabled_hypervisors,
4009 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4010 for hypervisor_name in cluster.enabled_hypervisors]),
4012 "beparams": cluster.beparams,
4013 "osparams": cluster.osparams,
4014 "nicparams": cluster.nicparams,
4015 "candidate_pool_size": cluster.candidate_pool_size,
4016 "master_netdev": cluster.master_netdev,
4017 "volume_group_name": cluster.volume_group_name,
4018 "file_storage_dir": cluster.file_storage_dir,
4019 "maintain_node_health": cluster.maintain_node_health,
4020 "ctime": cluster.ctime,
4021 "mtime": cluster.mtime,
4022 "uuid": cluster.uuid,
4023 "tags": list(cluster.GetTags()),
4024 "uid_pool": cluster.uid_pool,
4030 class LUQueryConfigValues(NoHooksLU):
4031 """Return configuration values.
4036 _FIELDS_DYNAMIC = utils.FieldSet()
4037 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4040 def CheckArguments(self):
4041 _CheckOutputFields(static=self._FIELDS_STATIC,
4042 dynamic=self._FIELDS_DYNAMIC,
4043 selected=self.op.output_fields)
4045 def ExpandNames(self):
4046 self.needed_locks = {}
4048 def Exec(self, feedback_fn):
4049 """Dump a representation of the cluster config to the standard output.
4053 for field in self.op.output_fields:
4054 if field == "cluster_name":
4055 entry = self.cfg.GetClusterName()
4056 elif field == "master_node":
4057 entry = self.cfg.GetMasterNode()
4058 elif field == "drain_flag":
4059 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4060 elif field == "watcher_pause":
4061 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4063 raise errors.ParameterError(field)
4064 values.append(entry)
4068 class LUActivateInstanceDisks(NoHooksLU):
4069 """Bring up an instance's disks.
4072 _OP_REQP = [("instance_name", _TNonEmptyString)]
4073 _OP_DEFS = [("ignore_size", False)]
4076 def ExpandNames(self):
4077 self._ExpandAndLockInstance()
4078 self.needed_locks[locking.LEVEL_NODE] = []
4079 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4081 def DeclareLocks(self, level):
4082 if level == locking.LEVEL_NODE:
4083 self._LockInstancesNodes()
4085 def CheckPrereq(self):
4086 """Check prerequisites.
4088 This checks that the instance is in the cluster.
4091 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4092 assert self.instance is not None, \
4093 "Cannot retrieve locked instance %s" % self.op.instance_name
4094 _CheckNodeOnline(self, self.instance.primary_node)
4096 def Exec(self, feedback_fn):
4097 """Activate the disks.
4100 disks_ok, disks_info = \
4101 _AssembleInstanceDisks(self, self.instance,
4102 ignore_size=self.op.ignore_size)
4104 raise errors.OpExecError("Cannot activate block devices")
4109 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4111 """Prepare the block devices for an instance.
4113 This sets up the block devices on all nodes.
4115 @type lu: L{LogicalUnit}
4116 @param lu: the logical unit on whose behalf we execute
4117 @type instance: L{objects.Instance}
4118 @param instance: the instance for whose disks we assemble
4119 @type disks: list of L{objects.Disk} or None
4120 @param disks: which disks to assemble (or all, if None)
4121 @type ignore_secondaries: boolean
4122 @param ignore_secondaries: if true, errors on secondary nodes
4123 won't result in an error return from the function
4124 @type ignore_size: boolean
4125 @param ignore_size: if true, the current known size of the disk
4126 will not be used during the disk activation, useful for cases
4127 when the size is wrong
4128 @return: False if the operation failed, otherwise a list of
4129 (host, instance_visible_name, node_visible_name)
4130 with the mapping from node devices to instance devices
4135 iname = instance.name
4136 disks = _ExpandCheckDisks(instance, disks)
4138 # With the two passes mechanism we try to reduce the window of
4139 # opportunity for the race condition of switching DRBD to primary
4140 # before handshaking occured, but we do not eliminate it
4142 # The proper fix would be to wait (with some limits) until the
4143 # connection has been made and drbd transitions from WFConnection
4144 # into any other network-connected state (Connected, SyncTarget,
4147 # 1st pass, assemble on all nodes in secondary mode
4148 for inst_disk in disks:
4149 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4151 node_disk = node_disk.Copy()
4152 node_disk.UnsetSize()
4153 lu.cfg.SetDiskID(node_disk, node)
4154 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4155 msg = result.fail_msg
4157 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4158 " (is_primary=False, pass=1): %s",
4159 inst_disk.iv_name, node, msg)
4160 if not ignore_secondaries:
4163 # FIXME: race condition on drbd migration to primary
4165 # 2nd pass, do only the primary node
4166 for inst_disk in disks:
4169 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4170 if node != instance.primary_node:
4173 node_disk = node_disk.Copy()
4174 node_disk.UnsetSize()
4175 lu.cfg.SetDiskID(node_disk, node)
4176 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4177 msg = result.fail_msg
4179 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4180 " (is_primary=True, pass=2): %s",
4181 inst_disk.iv_name, node, msg)
4184 dev_path = result.payload
4186 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4188 # leave the disks configured for the primary node
4189 # this is a workaround that would be fixed better by
4190 # improving the logical/physical id handling
4192 lu.cfg.SetDiskID(disk, instance.primary_node)
4194 return disks_ok, device_info
4197 def _StartInstanceDisks(lu, instance, force):
4198 """Start the disks of an instance.
4201 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4202 ignore_secondaries=force)
4204 _ShutdownInstanceDisks(lu, instance)
4205 if force is not None and not force:
4206 lu.proc.LogWarning("", hint="If the message above refers to a"
4208 " you can retry the operation using '--force'.")
4209 raise errors.OpExecError("Disk consistency error")
4212 class LUDeactivateInstanceDisks(NoHooksLU):
4213 """Shutdown an instance's disks.
4216 _OP_REQP = [("instance_name", _TNonEmptyString)]
4219 def ExpandNames(self):
4220 self._ExpandAndLockInstance()
4221 self.needed_locks[locking.LEVEL_NODE] = []
4222 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4224 def DeclareLocks(self, level):
4225 if level == locking.LEVEL_NODE:
4226 self._LockInstancesNodes()
4228 def CheckPrereq(self):
4229 """Check prerequisites.
4231 This checks that the instance is in the cluster.
4234 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4235 assert self.instance is not None, \
4236 "Cannot retrieve locked instance %s" % self.op.instance_name
4238 def Exec(self, feedback_fn):
4239 """Deactivate the disks
4242 instance = self.instance
4243 _SafeShutdownInstanceDisks(self, instance)
4246 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4247 """Shutdown block devices of an instance.
4249 This function checks if an instance is running, before calling
4250 _ShutdownInstanceDisks.
4253 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4254 _ShutdownInstanceDisks(lu, instance, disks=disks)
4257 def _ExpandCheckDisks(instance, disks):
4258 """Return the instance disks selected by the disks list
4260 @type disks: list of L{objects.Disk} or None
4261 @param disks: selected disks
4262 @rtype: list of L{objects.Disk}
4263 @return: selected instance disks to act on
4267 return instance.disks
4269 if not set(disks).issubset(instance.disks):
4270 raise errors.ProgrammerError("Can only act on disks belonging to the"
4275 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4276 """Shutdown block devices of an instance.
4278 This does the shutdown on all nodes of the instance.
4280 If the ignore_primary is false, errors on the primary node are
4285 disks = _ExpandCheckDisks(instance, disks)
4288 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4289 lu.cfg.SetDiskID(top_disk, node)
4290 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4291 msg = result.fail_msg
4293 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4294 disk.iv_name, node, msg)
4295 if not ignore_primary or node != instance.primary_node:
4300 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4301 """Checks if a node has enough free memory.
4303 This function check if a given node has the needed amount of free
4304 memory. In case the node has less memory or we cannot get the
4305 information from the node, this function raise an OpPrereqError
4308 @type lu: C{LogicalUnit}
4309 @param lu: a logical unit from which we get configuration data
4311 @param node: the node to check
4312 @type reason: C{str}
4313 @param reason: string to use in the error message
4314 @type requested: C{int}
4315 @param requested: the amount of memory in MiB to check for
4316 @type hypervisor_name: C{str}
4317 @param hypervisor_name: the hypervisor to ask for memory stats
4318 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4319 we cannot check the node
4322 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4323 nodeinfo[node].Raise("Can't get data from node %s" % node,
4324 prereq=True, ecode=errors.ECODE_ENVIRON)
4325 free_mem = nodeinfo[node].payload.get('memory_free', None)
4326 if not isinstance(free_mem, int):
4327 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4328 " was '%s'" % (node, free_mem),
4329 errors.ECODE_ENVIRON)
4330 if requested > free_mem:
4331 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4332 " needed %s MiB, available %s MiB" %
4333 (node, reason, requested, free_mem),
4337 def _CheckNodesFreeDisk(lu, nodenames, requested):
4338 """Checks if nodes have enough free disk space in the default VG.
4340 This function check if all given nodes have the needed amount of
4341 free disk. In case any node has less disk or we cannot get the
4342 information from the node, this function raise an OpPrereqError
4345 @type lu: C{LogicalUnit}
4346 @param lu: a logical unit from which we get configuration data
4347 @type nodenames: C{list}
4348 @param nodenames: the list of node names to check
4349 @type requested: C{int}
4350 @param requested: the amount of disk in MiB to check for
4351 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4352 we cannot check the node
4355 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4356 lu.cfg.GetHypervisorType())
4357 for node in nodenames:
4358 info = nodeinfo[node]
4359 info.Raise("Cannot get current information from node %s" % node,
4360 prereq=True, ecode=errors.ECODE_ENVIRON)
4361 vg_free = info.payload.get("vg_free", None)
4362 if not isinstance(vg_free, int):
4363 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4364 " result was '%s'" % (node, vg_free),
4365 errors.ECODE_ENVIRON)
4366 if requested > vg_free:
4367 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4368 " required %d MiB, available %d MiB" %
4369 (node, requested, vg_free),
4373 class LUStartupInstance(LogicalUnit):
4374 """Starts an instance.
4377 HPATH = "instance-start"
4378 HTYPE = constants.HTYPE_INSTANCE
4380 ("instance_name", _TNonEmptyString),
4382 ("beparams", _TDict),
4383 ("hvparams", _TDict),
4386 ("beparams", _EmptyDict),
4387 ("hvparams", _EmptyDict),
4391 def CheckArguments(self):
4393 if self.op.beparams:
4394 # fill the beparams dict
4395 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4397 def ExpandNames(self):
4398 self._ExpandAndLockInstance()
4400 def BuildHooksEnv(self):
4403 This runs on master, primary and secondary nodes of the instance.
4407 "FORCE": self.op.force,
4409 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4410 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4413 def CheckPrereq(self):
4414 """Check prerequisites.
4416 This checks that the instance is in the cluster.
4419 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4420 assert self.instance is not None, \
4421 "Cannot retrieve locked instance %s" % self.op.instance_name
4424 if self.op.hvparams:
4425 # check hypervisor parameter syntax (locally)
4426 cluster = self.cfg.GetClusterInfo()
4427 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4428 filled_hvp = cluster.FillHV(instance)
4429 filled_hvp.update(self.op.hvparams)
4430 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4431 hv_type.CheckParameterSyntax(filled_hvp)
4432 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4434 _CheckNodeOnline(self, instance.primary_node)
4436 bep = self.cfg.GetClusterInfo().FillBE(instance)
4437 # check bridges existence
4438 _CheckInstanceBridgesExist(self, instance)
4440 remote_info = self.rpc.call_instance_info(instance.primary_node,
4442 instance.hypervisor)
4443 remote_info.Raise("Error checking node %s" % instance.primary_node,
4444 prereq=True, ecode=errors.ECODE_ENVIRON)
4445 if not remote_info.payload: # not running already
4446 _CheckNodeFreeMemory(self, instance.primary_node,
4447 "starting instance %s" % instance.name,
4448 bep[constants.BE_MEMORY], instance.hypervisor)
4450 def Exec(self, feedback_fn):
4451 """Start the instance.
4454 instance = self.instance
4455 force = self.op.force
4457 self.cfg.MarkInstanceUp(instance.name)
4459 node_current = instance.primary_node
4461 _StartInstanceDisks(self, instance, force)
4463 result = self.rpc.call_instance_start(node_current, instance,
4464 self.op.hvparams, self.op.beparams)
4465 msg = result.fail_msg
4467 _ShutdownInstanceDisks(self, instance)
4468 raise errors.OpExecError("Could not start instance: %s" % msg)
4471 class LURebootInstance(LogicalUnit):
4472 """Reboot an instance.
4475 HPATH = "instance-reboot"
4476 HTYPE = constants.HTYPE_INSTANCE
4478 ("instance_name", _TNonEmptyString),
4479 ("ignore_secondaries", _TBool),
4480 ("reboot_type", _TElemOf(constants.REBOOT_TYPES)),
4482 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4485 def ExpandNames(self):
4486 self._ExpandAndLockInstance()
4488 def BuildHooksEnv(self):
4491 This runs on master, primary and secondary nodes of the instance.
4495 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4496 "REBOOT_TYPE": self.op.reboot_type,
4497 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4499 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4500 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4503 def CheckPrereq(self):
4504 """Check prerequisites.
4506 This checks that the instance is in the cluster.
4509 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4510 assert self.instance is not None, \
4511 "Cannot retrieve locked instance %s" % self.op.instance_name
4513 _CheckNodeOnline(self, instance.primary_node)
4515 # check bridges existence
4516 _CheckInstanceBridgesExist(self, instance)
4518 def Exec(self, feedback_fn):
4519 """Reboot the instance.
4522 instance = self.instance
4523 ignore_secondaries = self.op.ignore_secondaries
4524 reboot_type = self.op.reboot_type
4526 node_current = instance.primary_node
4528 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4529 constants.INSTANCE_REBOOT_HARD]:
4530 for disk in instance.disks:
4531 self.cfg.SetDiskID(disk, node_current)
4532 result = self.rpc.call_instance_reboot(node_current, instance,
4534 self.op.shutdown_timeout)
4535 result.Raise("Could not reboot instance")
4537 result = self.rpc.call_instance_shutdown(node_current, instance,
4538 self.op.shutdown_timeout)
4539 result.Raise("Could not shutdown instance for full reboot")
4540 _ShutdownInstanceDisks(self, instance)
4541 _StartInstanceDisks(self, instance, ignore_secondaries)
4542 result = self.rpc.call_instance_start(node_current, instance, None, None)
4543 msg = result.fail_msg
4545 _ShutdownInstanceDisks(self, instance)
4546 raise errors.OpExecError("Could not start instance for"
4547 " full reboot: %s" % msg)
4549 self.cfg.MarkInstanceUp(instance.name)
4552 class LUShutdownInstance(LogicalUnit):
4553 """Shutdown an instance.
4556 HPATH = "instance-stop"
4557 HTYPE = constants.HTYPE_INSTANCE
4558 _OP_REQP = [("instance_name", _TNonEmptyString)]
4559 _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4562 def ExpandNames(self):
4563 self._ExpandAndLockInstance()
4565 def BuildHooksEnv(self):
4568 This runs on master, primary and secondary nodes of the instance.
4571 env = _BuildInstanceHookEnvByObject(self, self.instance)
4572 env["TIMEOUT"] = self.op.timeout
4573 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4576 def CheckPrereq(self):
4577 """Check prerequisites.
4579 This checks that the instance is in the cluster.
4582 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4583 assert self.instance is not None, \
4584 "Cannot retrieve locked instance %s" % self.op.instance_name
4585 _CheckNodeOnline(self, self.instance.primary_node)
4587 def Exec(self, feedback_fn):
4588 """Shutdown the instance.
4591 instance = self.instance
4592 node_current = instance.primary_node
4593 timeout = self.op.timeout
4594 self.cfg.MarkInstanceDown(instance.name)
4595 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4596 msg = result.fail_msg
4598 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4600 _ShutdownInstanceDisks(self, instance)
4603 class LUReinstallInstance(LogicalUnit):
4604 """Reinstall an instance.
4607 HPATH = "instance-reinstall"
4608 HTYPE = constants.HTYPE_INSTANCE
4609 _OP_REQP = [("instance_name", _TNonEmptyString)]
4612 ("force_variant", False),
4616 def ExpandNames(self):
4617 self._ExpandAndLockInstance()
4619 def BuildHooksEnv(self):
4622 This runs on master, primary and secondary nodes of the instance.
4625 env = _BuildInstanceHookEnvByObject(self, self.instance)
4626 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4629 def CheckPrereq(self):
4630 """Check prerequisites.
4632 This checks that the instance is in the cluster and is not running.
4635 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4636 assert instance is not None, \
4637 "Cannot retrieve locked instance %s" % self.op.instance_name
4638 _CheckNodeOnline(self, instance.primary_node)
4640 if instance.disk_template == constants.DT_DISKLESS:
4641 raise errors.OpPrereqError("Instance '%s' has no disks" %
4642 self.op.instance_name,
4644 _CheckInstanceDown(self, instance, "cannot reinstall")
4646 if self.op.os_type is not None:
4648 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4649 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4651 self.instance = instance
4653 def Exec(self, feedback_fn):
4654 """Reinstall the instance.
4657 inst = self.instance
4659 if self.op.os_type is not None:
4660 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4661 inst.os = self.op.os_type
4662 self.cfg.Update(inst, feedback_fn)
4664 _StartInstanceDisks(self, inst, None)
4666 feedback_fn("Running the instance OS create scripts...")
4667 # FIXME: pass debug option from opcode to backend
4668 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4669 self.op.debug_level)
4670 result.Raise("Could not install OS for instance %s on node %s" %
4671 (inst.name, inst.primary_node))
4673 _ShutdownInstanceDisks(self, inst)
4676 class LURecreateInstanceDisks(LogicalUnit):
4677 """Recreate an instance's missing disks.
4680 HPATH = "instance-recreate-disks"
4681 HTYPE = constants.HTYPE_INSTANCE
4683 ("instance_name", _TNonEmptyString),
4684 ("disks", _TListOf(_TPositiveInt)),
4688 def ExpandNames(self):
4689 self._ExpandAndLockInstance()
4691 def BuildHooksEnv(self):
4694 This runs on master, primary and secondary nodes of the instance.
4697 env = _BuildInstanceHookEnvByObject(self, self.instance)
4698 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4701 def CheckPrereq(self):
4702 """Check prerequisites.
4704 This checks that the instance is in the cluster and is not running.
4707 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4708 assert instance is not None, \
4709 "Cannot retrieve locked instance %s" % self.op.instance_name
4710 _CheckNodeOnline(self, instance.primary_node)
4712 if instance.disk_template == constants.DT_DISKLESS:
4713 raise errors.OpPrereqError("Instance '%s' has no disks" %
4714 self.op.instance_name, errors.ECODE_INVAL)
4715 _CheckInstanceDown(self, instance, "cannot recreate disks")
4717 if not self.op.disks:
4718 self.op.disks = range(len(instance.disks))
4720 for idx in self.op.disks:
4721 if idx >= len(instance.disks):
4722 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4725 self.instance = instance
4727 def Exec(self, feedback_fn):
4728 """Recreate the disks.
4732 for idx, _ in enumerate(self.instance.disks):
4733 if idx not in self.op.disks: # disk idx has not been passed in
4737 _CreateDisks(self, self.instance, to_skip=to_skip)
4740 class LURenameInstance(LogicalUnit):
4741 """Rename an instance.
4744 HPATH = "instance-rename"
4745 HTYPE = constants.HTYPE_INSTANCE
4747 ("instance_name", _TNonEmptyString),
4748 ("new_name", _TNonEmptyString),
4750 _OP_DEFS = [("ignore_ip", False)]
4752 def BuildHooksEnv(self):
4755 This runs on master, primary and secondary nodes of the instance.
4758 env = _BuildInstanceHookEnvByObject(self, self.instance)
4759 env["INSTANCE_NEW_NAME"] = self.op.new_name
4760 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4763 def CheckPrereq(self):
4764 """Check prerequisites.
4766 This checks that the instance is in the cluster and is not running.
4769 self.op.instance_name = _ExpandInstanceName(self.cfg,
4770 self.op.instance_name)
4771 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4772 assert instance is not None
4773 _CheckNodeOnline(self, instance.primary_node)
4774 _CheckInstanceDown(self, instance, "cannot rename")
4775 self.instance = instance
4777 # new name verification
4778 name_info = utils.GetHostInfo(self.op.new_name)
4780 self.op.new_name = new_name = name_info.name
4781 instance_list = self.cfg.GetInstanceList()
4782 if new_name in instance_list:
4783 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4784 new_name, errors.ECODE_EXISTS)
4786 if not self.op.ignore_ip:
4787 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4788 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4789 (name_info.ip, new_name),
4790 errors.ECODE_NOTUNIQUE)
4792 def Exec(self, feedback_fn):
4793 """Reinstall the instance.
4796 inst = self.instance
4797 old_name = inst.name
4799 if inst.disk_template == constants.DT_FILE:
4800 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4802 self.cfg.RenameInstance(inst.name, self.op.new_name)
4803 # Change the instance lock. This is definitely safe while we hold the BGL
4804 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4805 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4807 # re-read the instance from the configuration after rename
4808 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4810 if inst.disk_template == constants.DT_FILE:
4811 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4812 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4813 old_file_storage_dir,
4814 new_file_storage_dir)
4815 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4816 " (but the instance has been renamed in Ganeti)" %
4817 (inst.primary_node, old_file_storage_dir,
4818 new_file_storage_dir))
4820 _StartInstanceDisks(self, inst, None)
4822 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4823 old_name, self.op.debug_level)
4824 msg = result.fail_msg
4826 msg = ("Could not run OS rename script for instance %s on node %s"
4827 " (but the instance has been renamed in Ganeti): %s" %
4828 (inst.name, inst.primary_node, msg))
4829 self.proc.LogWarning(msg)
4831 _ShutdownInstanceDisks(self, inst)
4834 class LURemoveInstance(LogicalUnit):
4835 """Remove an instance.
4838 HPATH = "instance-remove"
4839 HTYPE = constants.HTYPE_INSTANCE
4841 ("instance_name", _TNonEmptyString),
4842 ("ignore_failures", _TBool),
4844 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4847 def ExpandNames(self):
4848 self._ExpandAndLockInstance()
4849 self.needed_locks[locking.LEVEL_NODE] = []
4850 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4852 def DeclareLocks(self, level):
4853 if level == locking.LEVEL_NODE:
4854 self._LockInstancesNodes()
4856 def BuildHooksEnv(self):
4859 This runs on master, primary and secondary nodes of the instance.
4862 env = _BuildInstanceHookEnvByObject(self, self.instance)
4863 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4864 nl = [self.cfg.GetMasterNode()]
4865 nl_post = list(self.instance.all_nodes) + nl
4866 return env, nl, nl_post
4868 def CheckPrereq(self):
4869 """Check prerequisites.
4871 This checks that the instance is in the cluster.
4874 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4875 assert self.instance is not None, \
4876 "Cannot retrieve locked instance %s" % self.op.instance_name
4878 def Exec(self, feedback_fn):
4879 """Remove the instance.
4882 instance = self.instance
4883 logging.info("Shutting down instance %s on node %s",
4884 instance.name, instance.primary_node)
4886 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4887 self.op.shutdown_timeout)
4888 msg = result.fail_msg
4890 if self.op.ignore_failures:
4891 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4893 raise errors.OpExecError("Could not shutdown instance %s on"
4895 (instance.name, instance.primary_node, msg))
4897 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4900 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4901 """Utility function to remove an instance.
4904 logging.info("Removing block devices for instance %s", instance.name)
4906 if not _RemoveDisks(lu, instance):
4907 if not ignore_failures:
4908 raise errors.OpExecError("Can't remove instance's disks")
4909 feedback_fn("Warning: can't remove instance's disks")
4911 logging.info("Removing instance %s out of cluster config", instance.name)
4913 lu.cfg.RemoveInstance(instance.name)
4915 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4916 "Instance lock removal conflict"
4918 # Remove lock for the instance
4919 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4922 class LUQueryInstances(NoHooksLU):
4923 """Logical unit for querying instances.
4926 # pylint: disable-msg=W0142
4928 ("output_fields", _TListOf(_TNonEmptyString)),
4929 ("names", _TListOf(_TNonEmptyString)),
4930 ("use_locking", _TBool),
4933 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4934 "serial_no", "ctime", "mtime", "uuid"]
4935 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4937 "disk_template", "ip", "mac", "bridge",
4938 "nic_mode", "nic_link",
4939 "sda_size", "sdb_size", "vcpus", "tags",
4940 "network_port", "beparams",
4941 r"(disk)\.(size)/([0-9]+)",
4942 r"(disk)\.(sizes)", "disk_usage",
4943 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4944 r"(nic)\.(bridge)/([0-9]+)",
4945 r"(nic)\.(macs|ips|modes|links|bridges)",
4946 r"(disk|nic)\.(count)",
4948 ] + _SIMPLE_FIELDS +
4950 for name in constants.HVS_PARAMETERS
4951 if name not in constants.HVC_GLOBALS] +
4953 for name in constants.BES_PARAMETERS])
4954 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4957 def CheckArguments(self):
4958 _CheckOutputFields(static=self._FIELDS_STATIC,
4959 dynamic=self._FIELDS_DYNAMIC,
4960 selected=self.op.output_fields)
4962 def ExpandNames(self):
4963 self.needed_locks = {}
4964 self.share_locks[locking.LEVEL_INSTANCE] = 1
4965 self.share_locks[locking.LEVEL_NODE] = 1
4968 self.wanted = _GetWantedInstances(self, self.op.names)
4970 self.wanted = locking.ALL_SET
4972 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4973 self.do_locking = self.do_node_query and self.op.use_locking
4975 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4976 self.needed_locks[locking.LEVEL_NODE] = []
4977 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4979 def DeclareLocks(self, level):
4980 if level == locking.LEVEL_NODE and self.do_locking:
4981 self._LockInstancesNodes()
4983 def Exec(self, feedback_fn):
4984 """Computes the list of nodes and their attributes.
4987 # pylint: disable-msg=R0912
4988 # way too many branches here
4989 all_info = self.cfg.GetAllInstancesInfo()
4990 if self.wanted == locking.ALL_SET:
4991 # caller didn't specify instance names, so ordering is not important
4993 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4995 instance_names = all_info.keys()
4996 instance_names = utils.NiceSort(instance_names)
4998 # caller did specify names, so we must keep the ordering
5000 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5002 tgt_set = all_info.keys()
5003 missing = set(self.wanted).difference(tgt_set)
5005 raise errors.OpExecError("Some instances were removed before"
5006 " retrieving their data: %s" % missing)
5007 instance_names = self.wanted
5009 instance_list = [all_info[iname] for iname in instance_names]
5011 # begin data gathering
5013 nodes = frozenset([inst.primary_node for inst in instance_list])
5014 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5018 if self.do_node_query:
5020 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5022 result = node_data[name]
5024 # offline nodes will be in both lists
5025 off_nodes.append(name)
5027 bad_nodes.append(name)
5030 live_data.update(result.payload)
5031 # else no instance is alive
5033 live_data = dict([(name, {}) for name in instance_names])
5035 # end data gathering
5040 cluster = self.cfg.GetClusterInfo()
5041 for instance in instance_list:
5043 i_hv = cluster.FillHV(instance, skip_globals=True)
5044 i_be = cluster.FillBE(instance)
5045 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5046 for field in self.op.output_fields:
5047 st_match = self._FIELDS_STATIC.Matches(field)
5048 if field in self._SIMPLE_FIELDS:
5049 val = getattr(instance, field)
5050 elif field == "pnode":
5051 val = instance.primary_node
5052 elif field == "snodes":
5053 val = list(instance.secondary_nodes)
5054 elif field == "admin_state":
5055 val = instance.admin_up
5056 elif field == "oper_state":
5057 if instance.primary_node in bad_nodes:
5060 val = bool(live_data.get(instance.name))
5061 elif field == "status":
5062 if instance.primary_node in off_nodes:
5063 val = "ERROR_nodeoffline"
5064 elif instance.primary_node in bad_nodes:
5065 val = "ERROR_nodedown"
5067 running = bool(live_data.get(instance.name))
5069 if instance.admin_up:
5074 if instance.admin_up:
5078 elif field == "oper_ram":
5079 if instance.primary_node in bad_nodes:
5081 elif instance.name in live_data:
5082 val = live_data[instance.name].get("memory", "?")
5085 elif field == "vcpus":
5086 val = i_be[constants.BE_VCPUS]
5087 elif field == "disk_template":
5088 val = instance.disk_template
5091 val = instance.nics[0].ip
5094 elif field == "nic_mode":
5096 val = i_nicp[0][constants.NIC_MODE]
5099 elif field == "nic_link":
5101 val = i_nicp[0][constants.NIC_LINK]
5104 elif field == "bridge":
5105 if (instance.nics and
5106 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5107 val = i_nicp[0][constants.NIC_LINK]
5110 elif field == "mac":
5112 val = instance.nics[0].mac
5115 elif field == "sda_size" or field == "sdb_size":
5116 idx = ord(field[2]) - ord('a')
5118 val = instance.FindDisk(idx).size
5119 except errors.OpPrereqError:
5121 elif field == "disk_usage": # total disk usage per node
5122 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5123 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5124 elif field == "tags":
5125 val = list(instance.GetTags())
5126 elif field == "hvparams":
5128 elif (field.startswith(HVPREFIX) and
5129 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5130 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5131 val = i_hv.get(field[len(HVPREFIX):], None)
5132 elif field == "beparams":
5134 elif (field.startswith(BEPREFIX) and
5135 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5136 val = i_be.get(field[len(BEPREFIX):], None)
5137 elif st_match and st_match.groups():
5138 # matches a variable list
5139 st_groups = st_match.groups()
5140 if st_groups and st_groups[0] == "disk":
5141 if st_groups[1] == "count":
5142 val = len(instance.disks)
5143 elif st_groups[1] == "sizes":
5144 val = [disk.size for disk in instance.disks]
5145 elif st_groups[1] == "size":
5147 val = instance.FindDisk(st_groups[2]).size
5148 except errors.OpPrereqError:
5151 assert False, "Unhandled disk parameter"
5152 elif st_groups[0] == "nic":
5153 if st_groups[1] == "count":
5154 val = len(instance.nics)
5155 elif st_groups[1] == "macs":
5156 val = [nic.mac for nic in instance.nics]
5157 elif st_groups[1] == "ips":
5158 val = [nic.ip for nic in instance.nics]
5159 elif st_groups[1] == "modes":
5160 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5161 elif st_groups[1] == "links":
5162 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5163 elif st_groups[1] == "bridges":
5166 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5167 val.append(nicp[constants.NIC_LINK])
5172 nic_idx = int(st_groups[2])
5173 if nic_idx >= len(instance.nics):
5176 if st_groups[1] == "mac":
5177 val = instance.nics[nic_idx].mac
5178 elif st_groups[1] == "ip":
5179 val = instance.nics[nic_idx].ip
5180 elif st_groups[1] == "mode":
5181 val = i_nicp[nic_idx][constants.NIC_MODE]
5182 elif st_groups[1] == "link":
5183 val = i_nicp[nic_idx][constants.NIC_LINK]
5184 elif st_groups[1] == "bridge":
5185 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5186 if nic_mode == constants.NIC_MODE_BRIDGED:
5187 val = i_nicp[nic_idx][constants.NIC_LINK]
5191 assert False, "Unhandled NIC parameter"
5193 assert False, ("Declared but unhandled variable parameter '%s'" %
5196 assert False, "Declared but unhandled parameter '%s'" % field
5203 class LUFailoverInstance(LogicalUnit):
5204 """Failover an instance.
5207 HPATH = "instance-failover"
5208 HTYPE = constants.HTYPE_INSTANCE
5210 ("instance_name", _TNonEmptyString),
5211 ("ignore_consistency", _TBool),
5213 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5216 def ExpandNames(self):
5217 self._ExpandAndLockInstance()
5218 self.needed_locks[locking.LEVEL_NODE] = []
5219 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5221 def DeclareLocks(self, level):
5222 if level == locking.LEVEL_NODE:
5223 self._LockInstancesNodes()
5225 def BuildHooksEnv(self):
5228 This runs on master, primary and secondary nodes of the instance.
5231 instance = self.instance
5232 source_node = instance.primary_node
5233 target_node = instance.secondary_nodes[0]
5235 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5236 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5237 "OLD_PRIMARY": source_node,
5238 "OLD_SECONDARY": target_node,
5239 "NEW_PRIMARY": target_node,
5240 "NEW_SECONDARY": source_node,
5242 env.update(_BuildInstanceHookEnvByObject(self, instance))
5243 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5245 nl_post.append(source_node)
5246 return env, nl, nl_post
5248 def CheckPrereq(self):
5249 """Check prerequisites.
5251 This checks that the instance is in the cluster.
5254 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5255 assert self.instance is not None, \
5256 "Cannot retrieve locked instance %s" % self.op.instance_name
5258 bep = self.cfg.GetClusterInfo().FillBE(instance)
5259 if instance.disk_template not in constants.DTS_NET_MIRROR:
5260 raise errors.OpPrereqError("Instance's disk layout is not"
5261 " network mirrored, cannot failover.",
5264 secondary_nodes = instance.secondary_nodes
5265 if not secondary_nodes:
5266 raise errors.ProgrammerError("no secondary node but using "
5267 "a mirrored disk template")
5269 target_node = secondary_nodes[0]
5270 _CheckNodeOnline(self, target_node)
5271 _CheckNodeNotDrained(self, target_node)
5272 if instance.admin_up:
5273 # check memory requirements on the secondary node
5274 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5275 instance.name, bep[constants.BE_MEMORY],
5276 instance.hypervisor)
5278 self.LogInfo("Not checking memory on the secondary node as"
5279 " instance will not be started")
5281 # check bridge existance
5282 _CheckInstanceBridgesExist(self, instance, node=target_node)
5284 def Exec(self, feedback_fn):
5285 """Failover an instance.
5287 The failover is done by shutting it down on its present node and
5288 starting it on the secondary.
5291 instance = self.instance
5293 source_node = instance.primary_node
5294 target_node = instance.secondary_nodes[0]
5296 if instance.admin_up:
5297 feedback_fn("* checking disk consistency between source and target")
5298 for dev in instance.disks:
5299 # for drbd, these are drbd over lvm
5300 if not _CheckDiskConsistency(self, dev, target_node, False):
5301 if not self.op.ignore_consistency:
5302 raise errors.OpExecError("Disk %s is degraded on target node,"
5303 " aborting failover." % dev.iv_name)
5305 feedback_fn("* not checking disk consistency as instance is not running")
5307 feedback_fn("* shutting down instance on source node")
5308 logging.info("Shutting down instance %s on node %s",
5309 instance.name, source_node)
5311 result = self.rpc.call_instance_shutdown(source_node, instance,
5312 self.op.shutdown_timeout)
5313 msg = result.fail_msg
5315 if self.op.ignore_consistency:
5316 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5317 " Proceeding anyway. Please make sure node"
5318 " %s is down. Error details: %s",
5319 instance.name, source_node, source_node, msg)
5321 raise errors.OpExecError("Could not shutdown instance %s on"
5323 (instance.name, source_node, msg))
5325 feedback_fn("* deactivating the instance's disks on source node")
5326 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5327 raise errors.OpExecError("Can't shut down the instance's disks.")
5329 instance.primary_node = target_node
5330 # distribute new instance config to the other nodes
5331 self.cfg.Update(instance, feedback_fn)
5333 # Only start the instance if it's marked as up
5334 if instance.admin_up:
5335 feedback_fn("* activating the instance's disks on target node")
5336 logging.info("Starting instance %s on node %s",
5337 instance.name, target_node)
5339 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5340 ignore_secondaries=True)
5342 _ShutdownInstanceDisks(self, instance)
5343 raise errors.OpExecError("Can't activate the instance's disks")
5345 feedback_fn("* starting the instance on the target node")
5346 result = self.rpc.call_instance_start(target_node, instance, None, None)
5347 msg = result.fail_msg
5349 _ShutdownInstanceDisks(self, instance)
5350 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5351 (instance.name, target_node, msg))
5354 class LUMigrateInstance(LogicalUnit):
5355 """Migrate an instance.
5357 This is migration without shutting down, compared to the failover,
5358 which is done with shutdown.
5361 HPATH = "instance-migrate"
5362 HTYPE = constants.HTYPE_INSTANCE
5364 ("instance_name", _TNonEmptyString),
5366 ("cleanup", _TBool),
5371 def ExpandNames(self):
5372 self._ExpandAndLockInstance()
5374 self.needed_locks[locking.LEVEL_NODE] = []
5375 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5377 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5378 self.op.live, self.op.cleanup)
5379 self.tasklets = [self._migrater]
5381 def DeclareLocks(self, level):
5382 if level == locking.LEVEL_NODE:
5383 self._LockInstancesNodes()
5385 def BuildHooksEnv(self):
5388 This runs on master, primary and secondary nodes of the instance.
5391 instance = self._migrater.instance
5392 source_node = instance.primary_node
5393 target_node = instance.secondary_nodes[0]
5394 env = _BuildInstanceHookEnvByObject(self, instance)
5395 env["MIGRATE_LIVE"] = self.op.live
5396 env["MIGRATE_CLEANUP"] = self.op.cleanup
5398 "OLD_PRIMARY": source_node,
5399 "OLD_SECONDARY": target_node,
5400 "NEW_PRIMARY": target_node,
5401 "NEW_SECONDARY": source_node,
5403 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5405 nl_post.append(source_node)
5406 return env, nl, nl_post
5409 class LUMoveInstance(LogicalUnit):
5410 """Move an instance by data-copying.
5413 HPATH = "instance-move"
5414 HTYPE = constants.HTYPE_INSTANCE
5416 ("instance_name", _TNonEmptyString),
5417 ("target_node", _TNonEmptyString),
5419 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5422 def ExpandNames(self):
5423 self._ExpandAndLockInstance()
5424 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5425 self.op.target_node = target_node
5426 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5427 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5429 def DeclareLocks(self, level):
5430 if level == locking.LEVEL_NODE:
5431 self._LockInstancesNodes(primary_only=True)
5433 def BuildHooksEnv(self):
5436 This runs on master, primary and secondary nodes of the instance.
5440 "TARGET_NODE": self.op.target_node,
5441 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5443 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5444 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5445 self.op.target_node]
5448 def CheckPrereq(self):
5449 """Check prerequisites.
5451 This checks that the instance is in the cluster.
5454 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5455 assert self.instance is not None, \
5456 "Cannot retrieve locked instance %s" % self.op.instance_name
5458 node = self.cfg.GetNodeInfo(self.op.target_node)
5459 assert node is not None, \
5460 "Cannot retrieve locked node %s" % self.op.target_node
5462 self.target_node = target_node = node.name
5464 if target_node == instance.primary_node:
5465 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5466 (instance.name, target_node),
5469 bep = self.cfg.GetClusterInfo().FillBE(instance)
5471 for idx, dsk in enumerate(instance.disks):
5472 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5473 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5474 " cannot copy" % idx, errors.ECODE_STATE)
5476 _CheckNodeOnline(self, target_node)
5477 _CheckNodeNotDrained(self, target_node)
5479 if instance.admin_up:
5480 # check memory requirements on the secondary node
5481 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5482 instance.name, bep[constants.BE_MEMORY],
5483 instance.hypervisor)
5485 self.LogInfo("Not checking memory on the secondary node as"
5486 " instance will not be started")
5488 # check bridge existance
5489 _CheckInstanceBridgesExist(self, instance, node=target_node)
5491 def Exec(self, feedback_fn):
5492 """Move an instance.
5494 The move is done by shutting it down on its present node, copying
5495 the data over (slow) and starting it on the new node.
5498 instance = self.instance
5500 source_node = instance.primary_node
5501 target_node = self.target_node
5503 self.LogInfo("Shutting down instance %s on source node %s",
5504 instance.name, source_node)
5506 result = self.rpc.call_instance_shutdown(source_node, instance,
5507 self.op.shutdown_timeout)
5508 msg = result.fail_msg
5510 if self.op.ignore_consistency:
5511 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5512 " Proceeding anyway. Please make sure node"
5513 " %s is down. Error details: %s",
5514 instance.name, source_node, source_node, msg)
5516 raise errors.OpExecError("Could not shutdown instance %s on"
5518 (instance.name, source_node, msg))
5520 # create the target disks
5522 _CreateDisks(self, instance, target_node=target_node)
5523 except errors.OpExecError:
5524 self.LogWarning("Device creation failed, reverting...")
5526 _RemoveDisks(self, instance, target_node=target_node)
5528 self.cfg.ReleaseDRBDMinors(instance.name)
5531 cluster_name = self.cfg.GetClusterInfo().cluster_name
5534 # activate, get path, copy the data over
5535 for idx, disk in enumerate(instance.disks):
5536 self.LogInfo("Copying data for disk %d", idx)
5537 result = self.rpc.call_blockdev_assemble(target_node, disk,
5538 instance.name, True)
5540 self.LogWarning("Can't assemble newly created disk %d: %s",
5541 idx, result.fail_msg)
5542 errs.append(result.fail_msg)
5544 dev_path = result.payload
5545 result = self.rpc.call_blockdev_export(source_node, disk,
5546 target_node, dev_path,
5549 self.LogWarning("Can't copy data over for disk %d: %s",
5550 idx, result.fail_msg)
5551 errs.append(result.fail_msg)
5555 self.LogWarning("Some disks failed to copy, aborting")
5557 _RemoveDisks(self, instance, target_node=target_node)
5559 self.cfg.ReleaseDRBDMinors(instance.name)
5560 raise errors.OpExecError("Errors during disk copy: %s" %
5563 instance.primary_node = target_node
5564 self.cfg.Update(instance, feedback_fn)
5566 self.LogInfo("Removing the disks on the original node")
5567 _RemoveDisks(self, instance, target_node=source_node)
5569 # Only start the instance if it's marked as up
5570 if instance.admin_up:
5571 self.LogInfo("Starting instance %s on node %s",
5572 instance.name, target_node)
5574 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5575 ignore_secondaries=True)
5577 _ShutdownInstanceDisks(self, instance)
5578 raise errors.OpExecError("Can't activate the instance's disks")
5580 result = self.rpc.call_instance_start(target_node, instance, None, None)
5581 msg = result.fail_msg
5583 _ShutdownInstanceDisks(self, instance)
5584 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5585 (instance.name, target_node, msg))
5588 class LUMigrateNode(LogicalUnit):
5589 """Migrate all instances from a node.
5592 HPATH = "node-migrate"
5593 HTYPE = constants.HTYPE_NODE
5595 ("node_name", _TNonEmptyString),
5600 def ExpandNames(self):
5601 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5603 self.needed_locks = {
5604 locking.LEVEL_NODE: [self.op.node_name],
5607 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5609 # Create tasklets for migrating instances for all instances on this node
5613 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5614 logging.debug("Migrating instance %s", inst.name)
5615 names.append(inst.name)
5617 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5619 self.tasklets = tasklets
5621 # Declare instance locks
5622 self.needed_locks[locking.LEVEL_INSTANCE] = names
5624 def DeclareLocks(self, level):
5625 if level == locking.LEVEL_NODE:
5626 self._LockInstancesNodes()
5628 def BuildHooksEnv(self):
5631 This runs on the master, the primary and all the secondaries.
5635 "NODE_NAME": self.op.node_name,
5638 nl = [self.cfg.GetMasterNode()]
5640 return (env, nl, nl)
5643 class TLMigrateInstance(Tasklet):
5644 def __init__(self, lu, instance_name, live, cleanup):
5645 """Initializes this class.
5648 Tasklet.__init__(self, lu)
5651 self.instance_name = instance_name
5653 self.cleanup = cleanup
5655 def CheckPrereq(self):
5656 """Check prerequisites.
5658 This checks that the instance is in the cluster.
5661 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5662 instance = self.cfg.GetInstanceInfo(instance_name)
5663 assert instance is not None
5665 if instance.disk_template != constants.DT_DRBD8:
5666 raise errors.OpPrereqError("Instance's disk layout is not"
5667 " drbd8, cannot migrate.", errors.ECODE_STATE)
5669 secondary_nodes = instance.secondary_nodes
5670 if not secondary_nodes:
5671 raise errors.ConfigurationError("No secondary node but using"
5672 " drbd8 disk template")
5674 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5676 target_node = secondary_nodes[0]
5677 # check memory requirements on the secondary node
5678 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5679 instance.name, i_be[constants.BE_MEMORY],
5680 instance.hypervisor)
5682 # check bridge existance
5683 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5685 if not self.cleanup:
5686 _CheckNodeNotDrained(self.lu, target_node)
5687 result = self.rpc.call_instance_migratable(instance.primary_node,
5689 result.Raise("Can't migrate, please use failover",
5690 prereq=True, ecode=errors.ECODE_STATE)
5692 self.instance = instance
5694 def _WaitUntilSync(self):
5695 """Poll with custom rpc for disk sync.
5697 This uses our own step-based rpc call.
5700 self.feedback_fn("* wait until resync is done")
5704 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5706 self.instance.disks)
5708 for node, nres in result.items():
5709 nres.Raise("Cannot resync disks on node %s" % node)
5710 node_done, node_percent = nres.payload
5711 all_done = all_done and node_done
5712 if node_percent is not None:
5713 min_percent = min(min_percent, node_percent)
5715 if min_percent < 100:
5716 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5719 def _EnsureSecondary(self, node):
5720 """Demote a node to secondary.
5723 self.feedback_fn("* switching node %s to secondary mode" % node)
5725 for dev in self.instance.disks:
5726 self.cfg.SetDiskID(dev, node)
5728 result = self.rpc.call_blockdev_close(node, self.instance.name,
5729 self.instance.disks)
5730 result.Raise("Cannot change disk to secondary on node %s" % node)
5732 def _GoStandalone(self):
5733 """Disconnect from the network.
5736 self.feedback_fn("* changing into standalone mode")
5737 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5738 self.instance.disks)
5739 for node, nres in result.items():
5740 nres.Raise("Cannot disconnect disks node %s" % node)
5742 def _GoReconnect(self, multimaster):
5743 """Reconnect to the network.
5749 msg = "single-master"
5750 self.feedback_fn("* changing disks into %s mode" % msg)
5751 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5752 self.instance.disks,
5753 self.instance.name, multimaster)
5754 for node, nres in result.items():
5755 nres.Raise("Cannot change disks config on node %s" % node)
5757 def _ExecCleanup(self):
5758 """Try to cleanup after a failed migration.
5760 The cleanup is done by:
5761 - check that the instance is running only on one node
5762 (and update the config if needed)
5763 - change disks on its secondary node to secondary
5764 - wait until disks are fully synchronized
5765 - disconnect from the network
5766 - change disks into single-master mode
5767 - wait again until disks are fully synchronized
5770 instance = self.instance
5771 target_node = self.target_node
5772 source_node = self.source_node
5774 # check running on only one node
5775 self.feedback_fn("* checking where the instance actually runs"
5776 " (if this hangs, the hypervisor might be in"
5778 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5779 for node, result in ins_l.items():
5780 result.Raise("Can't contact node %s" % node)
5782 runningon_source = instance.name in ins_l[source_node].payload
5783 runningon_target = instance.name in ins_l[target_node].payload
5785 if runningon_source and runningon_target:
5786 raise errors.OpExecError("Instance seems to be running on two nodes,"
5787 " or the hypervisor is confused. You will have"
5788 " to ensure manually that it runs only on one"
5789 " and restart this operation.")
5791 if not (runningon_source or runningon_target):
5792 raise errors.OpExecError("Instance does not seem to be running at all."
5793 " In this case, it's safer to repair by"
5794 " running 'gnt-instance stop' to ensure disk"
5795 " shutdown, and then restarting it.")
5797 if runningon_target:
5798 # the migration has actually succeeded, we need to update the config
5799 self.feedback_fn("* instance running on secondary node (%s),"
5800 " updating config" % target_node)
5801 instance.primary_node = target_node
5802 self.cfg.Update(instance, self.feedback_fn)
5803 demoted_node = source_node
5805 self.feedback_fn("* instance confirmed to be running on its"
5806 " primary node (%s)" % source_node)
5807 demoted_node = target_node
5809 self._EnsureSecondary(demoted_node)
5811 self._WaitUntilSync()
5812 except errors.OpExecError:
5813 # we ignore here errors, since if the device is standalone, it
5814 # won't be able to sync
5816 self._GoStandalone()
5817 self._GoReconnect(False)
5818 self._WaitUntilSync()
5820 self.feedback_fn("* done")
5822 def _RevertDiskStatus(self):
5823 """Try to revert the disk status after a failed migration.
5826 target_node = self.target_node
5828 self._EnsureSecondary(target_node)
5829 self._GoStandalone()
5830 self._GoReconnect(False)
5831 self._WaitUntilSync()
5832 except errors.OpExecError, err:
5833 self.lu.LogWarning("Migration failed and I can't reconnect the"
5834 " drives: error '%s'\n"
5835 "Please look and recover the instance status" %
5838 def _AbortMigration(self):
5839 """Call the hypervisor code to abort a started migration.
5842 instance = self.instance
5843 target_node = self.target_node
5844 migration_info = self.migration_info
5846 abort_result = self.rpc.call_finalize_migration(target_node,
5850 abort_msg = abort_result.fail_msg
5852 logging.error("Aborting migration failed on target node %s: %s",
5853 target_node, abort_msg)
5854 # Don't raise an exception here, as we stil have to try to revert the
5855 # disk status, even if this step failed.
5857 def _ExecMigration(self):
5858 """Migrate an instance.
5860 The migrate is done by:
5861 - change the disks into dual-master mode
5862 - wait until disks are fully synchronized again
5863 - migrate the instance
5864 - change disks on the new secondary node (the old primary) to secondary
5865 - wait until disks are fully synchronized
5866 - change disks into single-master mode
5869 instance = self.instance
5870 target_node = self.target_node
5871 source_node = self.source_node
5873 self.feedback_fn("* checking disk consistency between source and target")
5874 for dev in instance.disks:
5875 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5876 raise errors.OpExecError("Disk %s is degraded or not fully"
5877 " synchronized on target node,"
5878 " aborting migrate." % dev.iv_name)
5880 # First get the migration information from the remote node
5881 result = self.rpc.call_migration_info(source_node, instance)
5882 msg = result.fail_msg
5884 log_err = ("Failed fetching source migration information from %s: %s" %
5886 logging.error(log_err)
5887 raise errors.OpExecError(log_err)
5889 self.migration_info = migration_info = result.payload
5891 # Then switch the disks to master/master mode
5892 self._EnsureSecondary(target_node)
5893 self._GoStandalone()
5894 self._GoReconnect(True)
5895 self._WaitUntilSync()
5897 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5898 result = self.rpc.call_accept_instance(target_node,
5901 self.nodes_ip[target_node])
5903 msg = result.fail_msg
5905 logging.error("Instance pre-migration failed, trying to revert"
5906 " disk status: %s", msg)
5907 self.feedback_fn("Pre-migration failed, aborting")
5908 self._AbortMigration()
5909 self._RevertDiskStatus()
5910 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5911 (instance.name, msg))
5913 self.feedback_fn("* migrating instance to %s" % target_node)
5915 result = self.rpc.call_instance_migrate(source_node, instance,
5916 self.nodes_ip[target_node],
5918 msg = result.fail_msg
5920 logging.error("Instance migration failed, trying to revert"
5921 " disk status: %s", msg)
5922 self.feedback_fn("Migration failed, aborting")
5923 self._AbortMigration()
5924 self._RevertDiskStatus()
5925 raise errors.OpExecError("Could not migrate instance %s: %s" %
5926 (instance.name, msg))
5929 instance.primary_node = target_node
5930 # distribute new instance config to the other nodes
5931 self.cfg.Update(instance, self.feedback_fn)
5933 result = self.rpc.call_finalize_migration(target_node,
5937 msg = result.fail_msg
5939 logging.error("Instance migration succeeded, but finalization failed:"
5941 raise errors.OpExecError("Could not finalize instance migration: %s" %
5944 self._EnsureSecondary(source_node)
5945 self._WaitUntilSync()
5946 self._GoStandalone()
5947 self._GoReconnect(False)
5948 self._WaitUntilSync()
5950 self.feedback_fn("* done")
5952 def Exec(self, feedback_fn):
5953 """Perform the migration.
5956 feedback_fn("Migrating instance %s" % self.instance.name)
5958 self.feedback_fn = feedback_fn
5960 self.source_node = self.instance.primary_node
5961 self.target_node = self.instance.secondary_nodes[0]
5962 self.all_nodes = [self.source_node, self.target_node]
5964 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5965 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5969 return self._ExecCleanup()
5971 return self._ExecMigration()
5974 def _CreateBlockDev(lu, node, instance, device, force_create,
5976 """Create a tree of block devices on a given node.
5978 If this device type has to be created on secondaries, create it and
5981 If not, just recurse to children keeping the same 'force' value.
5983 @param lu: the lu on whose behalf we execute
5984 @param node: the node on which to create the device
5985 @type instance: L{objects.Instance}
5986 @param instance: the instance which owns the device
5987 @type device: L{objects.Disk}
5988 @param device: the device to create
5989 @type force_create: boolean
5990 @param force_create: whether to force creation of this device; this
5991 will be change to True whenever we find a device which has
5992 CreateOnSecondary() attribute
5993 @param info: the extra 'metadata' we should attach to the device
5994 (this will be represented as a LVM tag)
5995 @type force_open: boolean
5996 @param force_open: this parameter will be passes to the
5997 L{backend.BlockdevCreate} function where it specifies
5998 whether we run on primary or not, and it affects both
5999 the child assembly and the device own Open() execution
6002 if device.CreateOnSecondary():
6006 for child in device.children:
6007 _CreateBlockDev(lu, node, instance, child, force_create,
6010 if not force_create:
6013 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6016 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6017 """Create a single block device on a given node.
6019 This will not recurse over children of the device, so they must be
6022 @param lu: the lu on whose behalf we execute
6023 @param node: the node on which to create the device
6024 @type instance: L{objects.Instance}
6025 @param instance: the instance which owns the device
6026 @type device: L{objects.Disk}
6027 @param device: the device to create
6028 @param info: the extra 'metadata' we should attach to the device
6029 (this will be represented as a LVM tag)
6030 @type force_open: boolean
6031 @param force_open: this parameter will be passes to the
6032 L{backend.BlockdevCreate} function where it specifies
6033 whether we run on primary or not, and it affects both
6034 the child assembly and the device own Open() execution
6037 lu.cfg.SetDiskID(device, node)
6038 result = lu.rpc.call_blockdev_create(node, device, device.size,
6039 instance.name, force_open, info)
6040 result.Raise("Can't create block device %s on"
6041 " node %s for instance %s" % (device, node, instance.name))
6042 if device.physical_id is None:
6043 device.physical_id = result.payload
6046 def _GenerateUniqueNames(lu, exts):
6047 """Generate a suitable LV name.
6049 This will generate a logical volume name for the given instance.
6054 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6055 results.append("%s%s" % (new_id, val))
6059 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6061 """Generate a drbd8 device complete with its children.
6064 port = lu.cfg.AllocatePort()
6065 vgname = lu.cfg.GetVGName()
6066 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6067 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6068 logical_id=(vgname, names[0]))
6069 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6070 logical_id=(vgname, names[1]))
6071 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6072 logical_id=(primary, secondary, port,
6075 children=[dev_data, dev_meta],
6080 def _GenerateDiskTemplate(lu, template_name,
6081 instance_name, primary_node,
6082 secondary_nodes, disk_info,
6083 file_storage_dir, file_driver,
6085 """Generate the entire disk layout for a given template type.
6088 #TODO: compute space requirements
6090 vgname = lu.cfg.GetVGName()
6091 disk_count = len(disk_info)
6093 if template_name == constants.DT_DISKLESS:
6095 elif template_name == constants.DT_PLAIN:
6096 if len(secondary_nodes) != 0:
6097 raise errors.ProgrammerError("Wrong template configuration")
6099 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6100 for i in range(disk_count)])
6101 for idx, disk in enumerate(disk_info):
6102 disk_index = idx + base_index
6103 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6104 logical_id=(vgname, names[idx]),
6105 iv_name="disk/%d" % disk_index,
6107 disks.append(disk_dev)
6108 elif template_name == constants.DT_DRBD8:
6109 if len(secondary_nodes) != 1:
6110 raise errors.ProgrammerError("Wrong template configuration")
6111 remote_node = secondary_nodes[0]
6112 minors = lu.cfg.AllocateDRBDMinor(
6113 [primary_node, remote_node] * len(disk_info), instance_name)
6116 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6117 for i in range(disk_count)]):
6118 names.append(lv_prefix + "_data")
6119 names.append(lv_prefix + "_meta")
6120 for idx, disk in enumerate(disk_info):
6121 disk_index = idx + base_index
6122 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6123 disk["size"], names[idx*2:idx*2+2],
6124 "disk/%d" % disk_index,
6125 minors[idx*2], minors[idx*2+1])
6126 disk_dev.mode = disk["mode"]
6127 disks.append(disk_dev)
6128 elif template_name == constants.DT_FILE:
6129 if len(secondary_nodes) != 0:
6130 raise errors.ProgrammerError("Wrong template configuration")
6132 _RequireFileStorage()
6134 for idx, disk in enumerate(disk_info):
6135 disk_index = idx + base_index
6136 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6137 iv_name="disk/%d" % disk_index,
6138 logical_id=(file_driver,
6139 "%s/disk%d" % (file_storage_dir,
6142 disks.append(disk_dev)
6144 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6148 def _GetInstanceInfoText(instance):
6149 """Compute that text that should be added to the disk's metadata.
6152 return "originstname+%s" % instance.name
6155 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6156 """Create all disks for an instance.
6158 This abstracts away some work from AddInstance.
6160 @type lu: L{LogicalUnit}
6161 @param lu: the logical unit on whose behalf we execute
6162 @type instance: L{objects.Instance}
6163 @param instance: the instance whose disks we should create
6165 @param to_skip: list of indices to skip
6166 @type target_node: string
6167 @param target_node: if passed, overrides the target node for creation
6169 @return: the success of the creation
6172 info = _GetInstanceInfoText(instance)
6173 if target_node is None:
6174 pnode = instance.primary_node
6175 all_nodes = instance.all_nodes
6180 if instance.disk_template == constants.DT_FILE:
6181 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6182 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6184 result.Raise("Failed to create directory '%s' on"
6185 " node %s" % (file_storage_dir, pnode))
6187 # Note: this needs to be kept in sync with adding of disks in
6188 # LUSetInstanceParams
6189 for idx, device in enumerate(instance.disks):
6190 if to_skip and idx in to_skip:
6192 logging.info("Creating volume %s for instance %s",
6193 device.iv_name, instance.name)
6195 for node in all_nodes:
6196 f_create = node == pnode
6197 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6200 def _RemoveDisks(lu, instance, target_node=None):
6201 """Remove all disks for an instance.
6203 This abstracts away some work from `AddInstance()` and
6204 `RemoveInstance()`. Note that in case some of the devices couldn't
6205 be removed, the removal will continue with the other ones (compare
6206 with `_CreateDisks()`).
6208 @type lu: L{LogicalUnit}
6209 @param lu: the logical unit on whose behalf we execute
6210 @type instance: L{objects.Instance}
6211 @param instance: the instance whose disks we should remove
6212 @type target_node: string
6213 @param target_node: used to override the node on which to remove the disks
6215 @return: the success of the removal
6218 logging.info("Removing block devices for instance %s", instance.name)
6221 for device in instance.disks:
6223 edata = [(target_node, device)]
6225 edata = device.ComputeNodeTree(instance.primary_node)
6226 for node, disk in edata:
6227 lu.cfg.SetDiskID(disk, node)
6228 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6230 lu.LogWarning("Could not remove block device %s on node %s,"
6231 " continuing anyway: %s", device.iv_name, node, msg)
6234 if instance.disk_template == constants.DT_FILE:
6235 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6239 tgt = instance.primary_node
6240 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6242 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6243 file_storage_dir, instance.primary_node, result.fail_msg)
6249 def _ComputeDiskSize(disk_template, disks):
6250 """Compute disk size requirements in the volume group
6253 # Required free disk space as a function of disk and swap space
6255 constants.DT_DISKLESS: None,
6256 constants.DT_PLAIN: sum(d["size"] for d in disks),
6257 # 128 MB are added for drbd metadata for each disk
6258 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6259 constants.DT_FILE: None,
6262 if disk_template not in req_size_dict:
6263 raise errors.ProgrammerError("Disk template '%s' size requirement"
6264 " is unknown" % disk_template)
6266 return req_size_dict[disk_template]
6269 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6270 """Hypervisor parameter validation.
6272 This function abstract the hypervisor parameter validation to be
6273 used in both instance create and instance modify.
6275 @type lu: L{LogicalUnit}
6276 @param lu: the logical unit for which we check
6277 @type nodenames: list
6278 @param nodenames: the list of nodes on which we should check
6279 @type hvname: string
6280 @param hvname: the name of the hypervisor we should use
6281 @type hvparams: dict
6282 @param hvparams: the parameters which we need to check
6283 @raise errors.OpPrereqError: if the parameters are not valid
6286 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6289 for node in nodenames:
6293 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6296 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6297 """OS parameters validation.
6299 @type lu: L{LogicalUnit}
6300 @param lu: the logical unit for which we check
6301 @type required: boolean
6302 @param required: whether the validation should fail if the OS is not
6304 @type nodenames: list
6305 @param nodenames: the list of nodes on which we should check
6306 @type osname: string
6307 @param osname: the name of the hypervisor we should use
6308 @type osparams: dict
6309 @param osparams: the parameters which we need to check
6310 @raise errors.OpPrereqError: if the parameters are not valid
6313 result = lu.rpc.call_os_validate(required, nodenames, osname,
6314 [constants.OS_VALIDATE_PARAMETERS],
6316 for node, nres in result.items():
6317 # we don't check for offline cases since this should be run only
6318 # against the master node and/or an instance's nodes
6319 nres.Raise("OS Parameters validation failed on node %s" % node)
6320 if not nres.payload:
6321 lu.LogInfo("OS %s not found on node %s, validation skipped",
6325 class LUCreateInstance(LogicalUnit):
6326 """Create an instance.
6329 HPATH = "instance-add"
6330 HTYPE = constants.HTYPE_INSTANCE
6332 ("instance_name", _TNonEmptyString),
6333 ("mode", _TElemOf(constants.INSTANCE_CREATE_MODES)),
6335 ("wait_for_sync", _TBool),
6336 ("ip_check", _TBool),
6337 ("disks", _TListOf(_TDict)),
6338 ("nics", _TListOf(_TDict)),
6339 ("hvparams", _TDict),
6340 ("beparams", _TDict),
6341 ("osparams", _TDict),
6344 ("name_check", True),
6345 ("no_install", False),
6347 ("force_variant", False),
6348 ("source_handshake", None),
6349 ("source_x509_ca", None),
6350 ("source_instance_name", None),
6355 ("iallocator", None),
6356 ("hypervisor", None),
6357 ("disk_template", None),
6358 ("identify_defaults", None),
6362 def CheckArguments(self):
6366 # do not require name_check to ease forward/backward compatibility
6368 if self.op.no_install and self.op.start:
6369 self.LogInfo("No-installation mode selected, disabling startup")
6370 self.op.start = False
6371 # validate/normalize the instance name
6372 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6373 if self.op.ip_check and not self.op.name_check:
6374 # TODO: make the ip check more flexible and not depend on the name check
6375 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6378 # check nics' parameter names
6379 for nic in self.op.nics:
6380 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6382 # check disks. parameter names and consistent adopt/no-adopt strategy
6383 has_adopt = has_no_adopt = False
6384 for disk in self.op.disks:
6385 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6390 if has_adopt and has_no_adopt:
6391 raise errors.OpPrereqError("Either all disks are adopted or none is",
6394 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6395 raise errors.OpPrereqError("Disk adoption is not supported for the"
6396 " '%s' disk template" %
6397 self.op.disk_template,
6399 if self.op.iallocator is not None:
6400 raise errors.OpPrereqError("Disk adoption not allowed with an"
6401 " iallocator script", errors.ECODE_INVAL)
6402 if self.op.mode == constants.INSTANCE_IMPORT:
6403 raise errors.OpPrereqError("Disk adoption not allowed for"
6404 " instance import", errors.ECODE_INVAL)
6406 self.adopt_disks = has_adopt
6408 # instance name verification
6409 if self.op.name_check:
6410 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6411 self.op.instance_name = self.hostname1.name
6412 # used in CheckPrereq for ip ping check
6413 self.check_ip = self.hostname1.ip
6414 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6415 raise errors.OpPrereqError("Remote imports require names to be checked" %
6418 self.check_ip = None
6420 # file storage checks
6421 if (self.op.file_driver and
6422 not self.op.file_driver in constants.FILE_DRIVER):
6423 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6424 self.op.file_driver, errors.ECODE_INVAL)
6426 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6427 raise errors.OpPrereqError("File storage directory path not absolute",
6430 ### Node/iallocator related checks
6431 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6432 raise errors.OpPrereqError("One and only one of iallocator and primary"
6433 " node must be given",
6436 self._cds = _GetClusterDomainSecret()
6438 if self.op.mode == constants.INSTANCE_IMPORT:
6439 # On import force_variant must be True, because if we forced it at
6440 # initial install, our only chance when importing it back is that it
6442 self.op.force_variant = True
6444 if self.op.no_install:
6445 self.LogInfo("No-installation mode has no effect during import")
6447 elif self.op.mode == constants.INSTANCE_CREATE:
6448 if self.op.os_type is None:
6449 raise errors.OpPrereqError("No guest OS specified",
6451 if self.op.disk_template is None:
6452 raise errors.OpPrereqError("No disk template specified",
6455 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6456 # Check handshake to ensure both clusters have the same domain secret
6457 src_handshake = self.op.source_handshake
6458 if not src_handshake:
6459 raise errors.OpPrereqError("Missing source handshake",
6462 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6465 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6468 # Load and check source CA
6469 self.source_x509_ca_pem = self.op.source_x509_ca
6470 if not self.source_x509_ca_pem:
6471 raise errors.OpPrereqError("Missing source X509 CA",
6475 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6477 except OpenSSL.crypto.Error, err:
6478 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6479 (err, ), errors.ECODE_INVAL)
6481 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6482 if errcode is not None:
6483 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6486 self.source_x509_ca = cert
6488 src_instance_name = self.op.source_instance_name
6489 if not src_instance_name:
6490 raise errors.OpPrereqError("Missing source instance name",
6493 self.source_instance_name = \
6494 utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6497 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6498 self.op.mode, errors.ECODE_INVAL)
6500 def ExpandNames(self):
6501 """ExpandNames for CreateInstance.
6503 Figure out the right locks for instance creation.
6506 self.needed_locks = {}
6508 instance_name = self.op.instance_name
6509 # this is just a preventive check, but someone might still add this
6510 # instance in the meantime, and creation will fail at lock-add time
6511 if instance_name in self.cfg.GetInstanceList():
6512 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6513 instance_name, errors.ECODE_EXISTS)
6515 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6517 if self.op.iallocator:
6518 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6520 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6521 nodelist = [self.op.pnode]
6522 if self.op.snode is not None:
6523 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6524 nodelist.append(self.op.snode)
6525 self.needed_locks[locking.LEVEL_NODE] = nodelist
6527 # in case of import lock the source node too
6528 if self.op.mode == constants.INSTANCE_IMPORT:
6529 src_node = self.op.src_node
6530 src_path = self.op.src_path
6532 if src_path is None:
6533 self.op.src_path = src_path = self.op.instance_name
6535 if src_node is None:
6536 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6537 self.op.src_node = None
6538 if os.path.isabs(src_path):
6539 raise errors.OpPrereqError("Importing an instance from an absolute"
6540 " path requires a source node option.",
6543 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6544 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6545 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6546 if not os.path.isabs(src_path):
6547 self.op.src_path = src_path = \
6548 utils.PathJoin(constants.EXPORT_DIR, src_path)
6550 def _RunAllocator(self):
6551 """Run the allocator based on input opcode.
6554 nics = [n.ToDict() for n in self.nics]
6555 ial = IAllocator(self.cfg, self.rpc,
6556 mode=constants.IALLOCATOR_MODE_ALLOC,
6557 name=self.op.instance_name,
6558 disk_template=self.op.disk_template,
6561 vcpus=self.be_full[constants.BE_VCPUS],
6562 mem_size=self.be_full[constants.BE_MEMORY],
6565 hypervisor=self.op.hypervisor,
6568 ial.Run(self.op.iallocator)
6571 raise errors.OpPrereqError("Can't compute nodes using"
6572 " iallocator '%s': %s" %
6573 (self.op.iallocator, ial.info),
6575 if len(ial.result) != ial.required_nodes:
6576 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6577 " of nodes (%s), required %s" %
6578 (self.op.iallocator, len(ial.result),
6579 ial.required_nodes), errors.ECODE_FAULT)
6580 self.op.pnode = ial.result[0]
6581 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6582 self.op.instance_name, self.op.iallocator,
6583 utils.CommaJoin(ial.result))
6584 if ial.required_nodes == 2:
6585 self.op.snode = ial.result[1]
6587 def BuildHooksEnv(self):
6590 This runs on master, primary and secondary nodes of the instance.
6594 "ADD_MODE": self.op.mode,
6596 if self.op.mode == constants.INSTANCE_IMPORT:
6597 env["SRC_NODE"] = self.op.src_node
6598 env["SRC_PATH"] = self.op.src_path
6599 env["SRC_IMAGES"] = self.src_images
6601 env.update(_BuildInstanceHookEnv(
6602 name=self.op.instance_name,
6603 primary_node=self.op.pnode,
6604 secondary_nodes=self.secondaries,
6605 status=self.op.start,
6606 os_type=self.op.os_type,
6607 memory=self.be_full[constants.BE_MEMORY],
6608 vcpus=self.be_full[constants.BE_VCPUS],
6609 nics=_NICListToTuple(self, self.nics),
6610 disk_template=self.op.disk_template,
6611 disks=[(d["size"], d["mode"]) for d in self.disks],
6614 hypervisor_name=self.op.hypervisor,
6617 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6621 def _ReadExportInfo(self):
6622 """Reads the export information from disk.
6624 It will override the opcode source node and path with the actual
6625 information, if these two were not specified before.
6627 @return: the export information
6630 assert self.op.mode == constants.INSTANCE_IMPORT
6632 src_node = self.op.src_node
6633 src_path = self.op.src_path
6635 if src_node is None:
6636 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6637 exp_list = self.rpc.call_export_list(locked_nodes)
6639 for node in exp_list:
6640 if exp_list[node].fail_msg:
6642 if src_path in exp_list[node].payload:
6644 self.op.src_node = src_node = node
6645 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6649 raise errors.OpPrereqError("No export found for relative path %s" %
6650 src_path, errors.ECODE_INVAL)
6652 _CheckNodeOnline(self, src_node)
6653 result = self.rpc.call_export_info(src_node, src_path)
6654 result.Raise("No export or invalid export found in dir %s" % src_path)
6656 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6657 if not export_info.has_section(constants.INISECT_EXP):
6658 raise errors.ProgrammerError("Corrupted export config",
6659 errors.ECODE_ENVIRON)
6661 ei_version = export_info.get(constants.INISECT_EXP, "version")
6662 if (int(ei_version) != constants.EXPORT_VERSION):
6663 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6664 (ei_version, constants.EXPORT_VERSION),
6665 errors.ECODE_ENVIRON)
6668 def _ReadExportParams(self, einfo):
6669 """Use export parameters as defaults.
6671 In case the opcode doesn't specify (as in override) some instance
6672 parameters, then try to use them from the export information, if
6676 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6678 if self.op.disk_template is None:
6679 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6680 self.op.disk_template = einfo.get(constants.INISECT_INS,
6683 raise errors.OpPrereqError("No disk template specified and the export"
6684 " is missing the disk_template information",
6687 if not self.op.disks:
6688 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6690 # TODO: import the disk iv_name too
6691 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6692 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6693 disks.append({"size": disk_sz})
6694 self.op.disks = disks
6696 raise errors.OpPrereqError("No disk info specified and the export"
6697 " is missing the disk information",
6700 if (not self.op.nics and
6701 einfo.has_option(constants.INISECT_INS, "nic_count")):
6703 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6705 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6706 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6711 if (self.op.hypervisor is None and
6712 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6713 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6714 if einfo.has_section(constants.INISECT_HYP):
6715 # use the export parameters but do not override the ones
6716 # specified by the user
6717 for name, value in einfo.items(constants.INISECT_HYP):
6718 if name not in self.op.hvparams:
6719 self.op.hvparams[name] = value
6721 if einfo.has_section(constants.INISECT_BEP):
6722 # use the parameters, without overriding
6723 for name, value in einfo.items(constants.INISECT_BEP):
6724 if name not in self.op.beparams:
6725 self.op.beparams[name] = value
6727 # try to read the parameters old style, from the main section
6728 for name in constants.BES_PARAMETERS:
6729 if (name not in self.op.beparams and
6730 einfo.has_option(constants.INISECT_INS, name)):
6731 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6733 if einfo.has_section(constants.INISECT_OSP):
6734 # use the parameters, without overriding
6735 for name, value in einfo.items(constants.INISECT_OSP):
6736 if name not in self.op.osparams:
6737 self.op.osparams[name] = value
6739 def _RevertToDefaults(self, cluster):
6740 """Revert the instance parameters to the default values.
6744 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6745 for name in self.op.hvparams.keys():
6746 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6747 del self.op.hvparams[name]
6749 be_defs = cluster.SimpleFillBE({})
6750 for name in self.op.beparams.keys():
6751 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6752 del self.op.beparams[name]
6754 nic_defs = cluster.SimpleFillNIC({})
6755 for nic in self.op.nics:
6756 for name in constants.NICS_PARAMETERS:
6757 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6760 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6761 for name in self.op.osparams.keys():
6762 if name in os_defs and os_defs[name] == self.op.osparams[name]:
6763 del self.op.osparams[name]
6765 def CheckPrereq(self):
6766 """Check prerequisites.
6769 if self.op.mode == constants.INSTANCE_IMPORT:
6770 export_info = self._ReadExportInfo()
6771 self._ReadExportParams(export_info)
6773 _CheckDiskTemplate(self.op.disk_template)
6775 if (not self.cfg.GetVGName() and
6776 self.op.disk_template not in constants.DTS_NOT_LVM):
6777 raise errors.OpPrereqError("Cluster does not support lvm-based"
6778 " instances", errors.ECODE_STATE)
6780 if self.op.hypervisor is None:
6781 self.op.hypervisor = self.cfg.GetHypervisorType()
6783 cluster = self.cfg.GetClusterInfo()
6784 enabled_hvs = cluster.enabled_hypervisors
6785 if self.op.hypervisor not in enabled_hvs:
6786 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6787 " cluster (%s)" % (self.op.hypervisor,
6788 ",".join(enabled_hvs)),
6791 # check hypervisor parameter syntax (locally)
6792 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6793 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6795 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6796 hv_type.CheckParameterSyntax(filled_hvp)
6797 self.hv_full = filled_hvp
6798 # check that we don't specify global parameters on an instance
6799 _CheckGlobalHvParams(self.op.hvparams)
6801 # fill and remember the beparams dict
6802 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6803 self.be_full = cluster.SimpleFillBE(self.op.beparams)
6805 # build os parameters
6806 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6808 # now that hvp/bep are in final format, let's reset to defaults,
6810 if self.op.identify_defaults:
6811 self._RevertToDefaults(cluster)
6815 for idx, nic in enumerate(self.op.nics):
6816 nic_mode_req = nic.get("mode", None)
6817 nic_mode = nic_mode_req
6818 if nic_mode is None:
6819 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6821 # in routed mode, for the first nic, the default ip is 'auto'
6822 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6823 default_ip_mode = constants.VALUE_AUTO
6825 default_ip_mode = constants.VALUE_NONE
6827 # ip validity checks
6828 ip = nic.get("ip", default_ip_mode)
6829 if ip is None or ip.lower() == constants.VALUE_NONE:
6831 elif ip.lower() == constants.VALUE_AUTO:
6832 if not self.op.name_check:
6833 raise errors.OpPrereqError("IP address set to auto but name checks"
6834 " have been skipped. Aborting.",
6836 nic_ip = self.hostname1.ip
6838 if not utils.IsValidIP(ip):
6839 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6840 " like a valid IP" % ip,
6844 # TODO: check the ip address for uniqueness
6845 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6846 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6849 # MAC address verification
6850 mac = nic.get("mac", constants.VALUE_AUTO)
6851 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6852 mac = utils.NormalizeAndValidateMac(mac)
6855 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6856 except errors.ReservationError:
6857 raise errors.OpPrereqError("MAC address %s already in use"
6858 " in cluster" % mac,
6859 errors.ECODE_NOTUNIQUE)
6861 # bridge verification
6862 bridge = nic.get("bridge", None)
6863 link = nic.get("link", None)
6865 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6866 " at the same time", errors.ECODE_INVAL)
6867 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6868 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6875 nicparams[constants.NIC_MODE] = nic_mode_req
6877 nicparams[constants.NIC_LINK] = link
6879 check_params = cluster.SimpleFillNIC(nicparams)
6880 objects.NIC.CheckParameterSyntax(check_params)
6881 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6883 # disk checks/pre-build
6885 for disk in self.op.disks:
6886 mode = disk.get("mode", constants.DISK_RDWR)
6887 if mode not in constants.DISK_ACCESS_SET:
6888 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6889 mode, errors.ECODE_INVAL)
6890 size = disk.get("size", None)
6892 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6895 except (TypeError, ValueError):
6896 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6898 new_disk = {"size": size, "mode": mode}
6900 new_disk["adopt"] = disk["adopt"]
6901 self.disks.append(new_disk)
6903 if self.op.mode == constants.INSTANCE_IMPORT:
6905 # Check that the new instance doesn't have less disks than the export
6906 instance_disks = len(self.disks)
6907 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6908 if instance_disks < export_disks:
6909 raise errors.OpPrereqError("Not enough disks to import."
6910 " (instance: %d, export: %d)" %
6911 (instance_disks, export_disks),
6915 for idx in range(export_disks):
6916 option = 'disk%d_dump' % idx
6917 if export_info.has_option(constants.INISECT_INS, option):
6918 # FIXME: are the old os-es, disk sizes, etc. useful?
6919 export_name = export_info.get(constants.INISECT_INS, option)
6920 image = utils.PathJoin(self.op.src_path, export_name)
6921 disk_images.append(image)
6923 disk_images.append(False)
6925 self.src_images = disk_images
6927 old_name = export_info.get(constants.INISECT_INS, 'name')
6929 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6930 except (TypeError, ValueError), err:
6931 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6932 " an integer: %s" % str(err),
6934 if self.op.instance_name == old_name:
6935 for idx, nic in enumerate(self.nics):
6936 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6937 nic_mac_ini = 'nic%d_mac' % idx
6938 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6940 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6942 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6943 if self.op.ip_check:
6944 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6945 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6946 (self.check_ip, self.op.instance_name),
6947 errors.ECODE_NOTUNIQUE)
6949 #### mac address generation
6950 # By generating here the mac address both the allocator and the hooks get
6951 # the real final mac address rather than the 'auto' or 'generate' value.
6952 # There is a race condition between the generation and the instance object
6953 # creation, which means that we know the mac is valid now, but we're not
6954 # sure it will be when we actually add the instance. If things go bad
6955 # adding the instance will abort because of a duplicate mac, and the
6956 # creation job will fail.
6957 for nic in self.nics:
6958 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6959 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6963 if self.op.iallocator is not None:
6964 self._RunAllocator()
6966 #### node related checks
6968 # check primary node
6969 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6970 assert self.pnode is not None, \
6971 "Cannot retrieve locked node %s" % self.op.pnode
6973 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6974 pnode.name, errors.ECODE_STATE)
6976 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6977 pnode.name, errors.ECODE_STATE)
6979 self.secondaries = []
6981 # mirror node verification
6982 if self.op.disk_template in constants.DTS_NET_MIRROR:
6983 if self.op.snode is None:
6984 raise errors.OpPrereqError("The networked disk templates need"
6985 " a mirror node", errors.ECODE_INVAL)
6986 if self.op.snode == pnode.name:
6987 raise errors.OpPrereqError("The secondary node cannot be the"
6988 " primary node.", errors.ECODE_INVAL)
6989 _CheckNodeOnline(self, self.op.snode)
6990 _CheckNodeNotDrained(self, self.op.snode)
6991 self.secondaries.append(self.op.snode)
6993 nodenames = [pnode.name] + self.secondaries
6995 req_size = _ComputeDiskSize(self.op.disk_template,
6998 # Check lv size requirements, if not adopting
6999 if req_size is not None and not self.adopt_disks:
7000 _CheckNodesFreeDisk(self, nodenames, req_size)
7002 if self.adopt_disks: # instead, we must check the adoption data
7003 all_lvs = set([i["adopt"] for i in self.disks])
7004 if len(all_lvs) != len(self.disks):
7005 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7007 for lv_name in all_lvs:
7009 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7010 except errors.ReservationError:
7011 raise errors.OpPrereqError("LV named %s used by another instance" %
7012 lv_name, errors.ECODE_NOTUNIQUE)
7014 node_lvs = self.rpc.call_lv_list([pnode.name],
7015 self.cfg.GetVGName())[pnode.name]
7016 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7017 node_lvs = node_lvs.payload
7018 delta = all_lvs.difference(node_lvs.keys())
7020 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7021 utils.CommaJoin(delta),
7023 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7025 raise errors.OpPrereqError("Online logical volumes found, cannot"
7026 " adopt: %s" % utils.CommaJoin(online_lvs),
7028 # update the size of disk based on what is found
7029 for dsk in self.disks:
7030 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7032 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7034 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7035 # check OS parameters (remotely)
7036 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7038 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7040 # memory check on primary node
7042 _CheckNodeFreeMemory(self, self.pnode.name,
7043 "creating instance %s" % self.op.instance_name,
7044 self.be_full[constants.BE_MEMORY],
7047 self.dry_run_result = list(nodenames)
7049 def Exec(self, feedback_fn):
7050 """Create and add the instance to the cluster.
7053 instance = self.op.instance_name
7054 pnode_name = self.pnode.name
7056 ht_kind = self.op.hypervisor
7057 if ht_kind in constants.HTS_REQ_PORT:
7058 network_port = self.cfg.AllocatePort()
7062 if constants.ENABLE_FILE_STORAGE:
7063 # this is needed because os.path.join does not accept None arguments
7064 if self.op.file_storage_dir is None:
7065 string_file_storage_dir = ""
7067 string_file_storage_dir = self.op.file_storage_dir
7069 # build the full file storage dir path
7070 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7071 string_file_storage_dir, instance)
7073 file_storage_dir = ""
7075 disks = _GenerateDiskTemplate(self,
7076 self.op.disk_template,
7077 instance, pnode_name,
7081 self.op.file_driver,
7084 iobj = objects.Instance(name=instance, os=self.op.os_type,
7085 primary_node=pnode_name,
7086 nics=self.nics, disks=disks,
7087 disk_template=self.op.disk_template,
7089 network_port=network_port,
7090 beparams=self.op.beparams,
7091 hvparams=self.op.hvparams,
7092 hypervisor=self.op.hypervisor,
7093 osparams=self.op.osparams,
7096 if self.adopt_disks:
7097 # rename LVs to the newly-generated names; we need to construct
7098 # 'fake' LV disks with the old data, plus the new unique_id
7099 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7101 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7102 rename_to.append(t_dsk.logical_id)
7103 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7104 self.cfg.SetDiskID(t_dsk, pnode_name)
7105 result = self.rpc.call_blockdev_rename(pnode_name,
7106 zip(tmp_disks, rename_to))
7107 result.Raise("Failed to rename adoped LVs")
7109 feedback_fn("* creating instance disks...")
7111 _CreateDisks(self, iobj)
7112 except errors.OpExecError:
7113 self.LogWarning("Device creation failed, reverting...")
7115 _RemoveDisks(self, iobj)
7117 self.cfg.ReleaseDRBDMinors(instance)
7120 feedback_fn("adding instance %s to cluster config" % instance)
7122 self.cfg.AddInstance(iobj, self.proc.GetECId())
7124 # Declare that we don't want to remove the instance lock anymore, as we've
7125 # added the instance to the config
7126 del self.remove_locks[locking.LEVEL_INSTANCE]
7127 # Unlock all the nodes
7128 if self.op.mode == constants.INSTANCE_IMPORT:
7129 nodes_keep = [self.op.src_node]
7130 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7131 if node != self.op.src_node]
7132 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7133 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7135 self.context.glm.release(locking.LEVEL_NODE)
7136 del self.acquired_locks[locking.LEVEL_NODE]
7138 if self.op.wait_for_sync:
7139 disk_abort = not _WaitForSync(self, iobj)
7140 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7141 # make sure the disks are not degraded (still sync-ing is ok)
7143 feedback_fn("* checking mirrors status")
7144 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7149 _RemoveDisks(self, iobj)
7150 self.cfg.RemoveInstance(iobj.name)
7151 # Make sure the instance lock gets removed
7152 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7153 raise errors.OpExecError("There are some degraded disks for"
7156 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7157 if self.op.mode == constants.INSTANCE_CREATE:
7158 if not self.op.no_install:
7159 feedback_fn("* running the instance OS create scripts...")
7160 # FIXME: pass debug option from opcode to backend
7161 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7162 self.op.debug_level)
7163 result.Raise("Could not add os for instance %s"
7164 " on node %s" % (instance, pnode_name))
7166 elif self.op.mode == constants.INSTANCE_IMPORT:
7167 feedback_fn("* running the instance OS import scripts...")
7171 for idx, image in enumerate(self.src_images):
7175 # FIXME: pass debug option from opcode to backend
7176 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7177 constants.IEIO_FILE, (image, ),
7178 constants.IEIO_SCRIPT,
7179 (iobj.disks[idx], idx),
7181 transfers.append(dt)
7184 masterd.instance.TransferInstanceData(self, feedback_fn,
7185 self.op.src_node, pnode_name,
7186 self.pnode.secondary_ip,
7188 if not compat.all(import_result):
7189 self.LogWarning("Some disks for instance %s on node %s were not"
7190 " imported successfully" % (instance, pnode_name))
7192 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7193 feedback_fn("* preparing remote import...")
7194 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7195 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7197 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7198 self.source_x509_ca,
7199 self._cds, timeouts)
7200 if not compat.all(disk_results):
7201 # TODO: Should the instance still be started, even if some disks
7202 # failed to import (valid for local imports, too)?
7203 self.LogWarning("Some disks for instance %s on node %s were not"
7204 " imported successfully" % (instance, pnode_name))
7206 # Run rename script on newly imported instance
7207 assert iobj.name == instance
7208 feedback_fn("Running rename script for %s" % instance)
7209 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7210 self.source_instance_name,
7211 self.op.debug_level)
7213 self.LogWarning("Failed to run rename script for %s on node"
7214 " %s: %s" % (instance, pnode_name, result.fail_msg))
7217 # also checked in the prereq part
7218 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7222 iobj.admin_up = True
7223 self.cfg.Update(iobj, feedback_fn)
7224 logging.info("Starting instance %s on node %s", instance, pnode_name)
7225 feedback_fn("* starting instance...")
7226 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7227 result.Raise("Could not start instance")
7229 return list(iobj.all_nodes)
7232 class LUConnectConsole(NoHooksLU):
7233 """Connect to an instance's console.
7235 This is somewhat special in that it returns the command line that
7236 you need to run on the master node in order to connect to the
7240 _OP_REQP = [("instance_name", _TNonEmptyString)]
7243 def ExpandNames(self):
7244 self._ExpandAndLockInstance()
7246 def CheckPrereq(self):
7247 """Check prerequisites.
7249 This checks that the instance is in the cluster.
7252 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7253 assert self.instance is not None, \
7254 "Cannot retrieve locked instance %s" % self.op.instance_name
7255 _CheckNodeOnline(self, self.instance.primary_node)
7257 def Exec(self, feedback_fn):
7258 """Connect to the console of an instance
7261 instance = self.instance
7262 node = instance.primary_node
7264 node_insts = self.rpc.call_instance_list([node],
7265 [instance.hypervisor])[node]
7266 node_insts.Raise("Can't get node information from %s" % node)
7268 if instance.name not in node_insts.payload:
7269 raise errors.OpExecError("Instance %s is not running." % instance.name)
7271 logging.debug("Connecting to console of %s on %s", instance.name, node)
7273 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7274 cluster = self.cfg.GetClusterInfo()
7275 # beparams and hvparams are passed separately, to avoid editing the
7276 # instance and then saving the defaults in the instance itself.
7277 hvparams = cluster.FillHV(instance)
7278 beparams = cluster.FillBE(instance)
7279 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7282 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7285 class LUReplaceDisks(LogicalUnit):
7286 """Replace the disks of an instance.
7289 HPATH = "mirrors-replace"
7290 HTYPE = constants.HTYPE_INSTANCE
7292 ("instance_name", _TNonEmptyString),
7293 ("mode", _TElemOf(constants.REPLACE_MODES)),
7294 ("disks", _TListOf(_TPositiveInt)),
7297 ("remote_node", None),
7298 ("iallocator", None),
7299 ("early_release", None),
7303 def CheckArguments(self):
7304 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7307 def ExpandNames(self):
7308 self._ExpandAndLockInstance()
7310 if self.op.iallocator is not None:
7311 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7313 elif self.op.remote_node is not None:
7314 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7315 self.op.remote_node = remote_node
7317 # Warning: do not remove the locking of the new secondary here
7318 # unless DRBD8.AddChildren is changed to work in parallel;
7319 # currently it doesn't since parallel invocations of
7320 # FindUnusedMinor will conflict
7321 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7322 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7325 self.needed_locks[locking.LEVEL_NODE] = []
7326 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7328 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7329 self.op.iallocator, self.op.remote_node,
7330 self.op.disks, False, self.op.early_release)
7332 self.tasklets = [self.replacer]
7334 def DeclareLocks(self, level):
7335 # If we're not already locking all nodes in the set we have to declare the
7336 # instance's primary/secondary nodes.
7337 if (level == locking.LEVEL_NODE and
7338 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7339 self._LockInstancesNodes()
7341 def BuildHooksEnv(self):
7344 This runs on the master, the primary and all the secondaries.
7347 instance = self.replacer.instance
7349 "MODE": self.op.mode,
7350 "NEW_SECONDARY": self.op.remote_node,
7351 "OLD_SECONDARY": instance.secondary_nodes[0],
7353 env.update(_BuildInstanceHookEnvByObject(self, instance))
7355 self.cfg.GetMasterNode(),
7356 instance.primary_node,
7358 if self.op.remote_node is not None:
7359 nl.append(self.op.remote_node)
7363 class TLReplaceDisks(Tasklet):
7364 """Replaces disks for an instance.
7366 Note: Locking is not within the scope of this class.
7369 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7370 disks, delay_iallocator, early_release):
7371 """Initializes this class.
7374 Tasklet.__init__(self, lu)
7377 self.instance_name = instance_name
7379 self.iallocator_name = iallocator_name
7380 self.remote_node = remote_node
7382 self.delay_iallocator = delay_iallocator
7383 self.early_release = early_release
7386 self.instance = None
7387 self.new_node = None
7388 self.target_node = None
7389 self.other_node = None
7390 self.remote_node_info = None
7391 self.node_secondary_ip = None
7394 def CheckArguments(mode, remote_node, iallocator):
7395 """Helper function for users of this class.
7398 # check for valid parameter combination
7399 if mode == constants.REPLACE_DISK_CHG:
7400 if remote_node is None and iallocator is None:
7401 raise errors.OpPrereqError("When changing the secondary either an"
7402 " iallocator script must be used or the"
7403 " new node given", errors.ECODE_INVAL)
7405 if remote_node is not None and iallocator is not None:
7406 raise errors.OpPrereqError("Give either the iallocator or the new"
7407 " secondary, not both", errors.ECODE_INVAL)
7409 elif remote_node is not None or iallocator is not None:
7410 # Not replacing the secondary
7411 raise errors.OpPrereqError("The iallocator and new node options can"
7412 " only be used when changing the"
7413 " secondary node", errors.ECODE_INVAL)
7416 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7417 """Compute a new secondary node using an IAllocator.
7420 ial = IAllocator(lu.cfg, lu.rpc,
7421 mode=constants.IALLOCATOR_MODE_RELOC,
7423 relocate_from=relocate_from)
7425 ial.Run(iallocator_name)
7428 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7429 " %s" % (iallocator_name, ial.info),
7432 if len(ial.result) != ial.required_nodes:
7433 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7434 " of nodes (%s), required %s" %
7436 len(ial.result), ial.required_nodes),
7439 remote_node_name = ial.result[0]
7441 lu.LogInfo("Selected new secondary for instance '%s': %s",
7442 instance_name, remote_node_name)
7444 return remote_node_name
7446 def _FindFaultyDisks(self, node_name):
7447 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7450 def CheckPrereq(self):
7451 """Check prerequisites.
7453 This checks that the instance is in the cluster.
7456 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7457 assert instance is not None, \
7458 "Cannot retrieve locked instance %s" % self.instance_name
7460 if instance.disk_template != constants.DT_DRBD8:
7461 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7462 " instances", errors.ECODE_INVAL)
7464 if len(instance.secondary_nodes) != 1:
7465 raise errors.OpPrereqError("The instance has a strange layout,"
7466 " expected one secondary but found %d" %
7467 len(instance.secondary_nodes),
7470 if not self.delay_iallocator:
7471 self._CheckPrereq2()
7473 def _CheckPrereq2(self):
7474 """Check prerequisites, second part.
7476 This function should always be part of CheckPrereq. It was separated and is
7477 now called from Exec because during node evacuation iallocator was only
7478 called with an unmodified cluster model, not taking planned changes into
7482 instance = self.instance
7483 secondary_node = instance.secondary_nodes[0]
7485 if self.iallocator_name is None:
7486 remote_node = self.remote_node
7488 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7489 instance.name, instance.secondary_nodes)
7491 if remote_node is not None:
7492 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7493 assert self.remote_node_info is not None, \
7494 "Cannot retrieve locked node %s" % remote_node
7496 self.remote_node_info = None
7498 if remote_node == self.instance.primary_node:
7499 raise errors.OpPrereqError("The specified node is the primary node of"
7500 " the instance.", errors.ECODE_INVAL)
7502 if remote_node == secondary_node:
7503 raise errors.OpPrereqError("The specified node is already the"
7504 " secondary node of the instance.",
7507 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7508 constants.REPLACE_DISK_CHG):
7509 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7512 if self.mode == constants.REPLACE_DISK_AUTO:
7513 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7514 faulty_secondary = self._FindFaultyDisks(secondary_node)
7516 if faulty_primary and faulty_secondary:
7517 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7518 " one node and can not be repaired"
7519 " automatically" % self.instance_name,
7523 self.disks = faulty_primary
7524 self.target_node = instance.primary_node
7525 self.other_node = secondary_node
7526 check_nodes = [self.target_node, self.other_node]
7527 elif faulty_secondary:
7528 self.disks = faulty_secondary
7529 self.target_node = secondary_node
7530 self.other_node = instance.primary_node
7531 check_nodes = [self.target_node, self.other_node]
7537 # Non-automatic modes
7538 if self.mode == constants.REPLACE_DISK_PRI:
7539 self.target_node = instance.primary_node
7540 self.other_node = secondary_node
7541 check_nodes = [self.target_node, self.other_node]
7543 elif self.mode == constants.REPLACE_DISK_SEC:
7544 self.target_node = secondary_node
7545 self.other_node = instance.primary_node
7546 check_nodes = [self.target_node, self.other_node]
7548 elif self.mode == constants.REPLACE_DISK_CHG:
7549 self.new_node = remote_node
7550 self.other_node = instance.primary_node
7551 self.target_node = secondary_node
7552 check_nodes = [self.new_node, self.other_node]
7554 _CheckNodeNotDrained(self.lu, remote_node)
7556 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7557 assert old_node_info is not None
7558 if old_node_info.offline and not self.early_release:
7559 # doesn't make sense to delay the release
7560 self.early_release = True
7561 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7562 " early-release mode", secondary_node)
7565 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7568 # If not specified all disks should be replaced
7570 self.disks = range(len(self.instance.disks))
7572 for node in check_nodes:
7573 _CheckNodeOnline(self.lu, node)
7575 # Check whether disks are valid
7576 for disk_idx in self.disks:
7577 instance.FindDisk(disk_idx)
7579 # Get secondary node IP addresses
7582 for node_name in [self.target_node, self.other_node, self.new_node]:
7583 if node_name is not None:
7584 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7586 self.node_secondary_ip = node_2nd_ip
7588 def Exec(self, feedback_fn):
7589 """Execute disk replacement.
7591 This dispatches the disk replacement to the appropriate handler.
7594 if self.delay_iallocator:
7595 self._CheckPrereq2()
7598 feedback_fn("No disks need replacement")
7601 feedback_fn("Replacing disk(s) %s for %s" %
7602 (utils.CommaJoin(self.disks), self.instance.name))
7604 activate_disks = (not self.instance.admin_up)
7606 # Activate the instance disks if we're replacing them on a down instance
7608 _StartInstanceDisks(self.lu, self.instance, True)
7611 # Should we replace the secondary node?
7612 if self.new_node is not None:
7613 fn = self._ExecDrbd8Secondary
7615 fn = self._ExecDrbd8DiskOnly
7617 return fn(feedback_fn)
7620 # Deactivate the instance disks if we're replacing them on a
7623 _SafeShutdownInstanceDisks(self.lu, self.instance)
7625 def _CheckVolumeGroup(self, nodes):
7626 self.lu.LogInfo("Checking volume groups")
7628 vgname = self.cfg.GetVGName()
7630 # Make sure volume group exists on all involved nodes
7631 results = self.rpc.call_vg_list(nodes)
7633 raise errors.OpExecError("Can't list volume groups on the nodes")
7637 res.Raise("Error checking node %s" % node)
7638 if vgname not in res.payload:
7639 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7642 def _CheckDisksExistence(self, nodes):
7643 # Check disk existence
7644 for idx, dev in enumerate(self.instance.disks):
7645 if idx not in self.disks:
7649 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7650 self.cfg.SetDiskID(dev, node)
7652 result = self.rpc.call_blockdev_find(node, dev)
7654 msg = result.fail_msg
7655 if msg or not result.payload:
7657 msg = "disk not found"
7658 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7661 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7662 for idx, dev in enumerate(self.instance.disks):
7663 if idx not in self.disks:
7666 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7669 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7671 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7672 " replace disks for instance %s" %
7673 (node_name, self.instance.name))
7675 def _CreateNewStorage(self, node_name):
7676 vgname = self.cfg.GetVGName()
7679 for idx, dev in enumerate(self.instance.disks):
7680 if idx not in self.disks:
7683 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7685 self.cfg.SetDiskID(dev, node_name)
7687 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7688 names = _GenerateUniqueNames(self.lu, lv_names)
7690 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7691 logical_id=(vgname, names[0]))
7692 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7693 logical_id=(vgname, names[1]))
7695 new_lvs = [lv_data, lv_meta]
7696 old_lvs = dev.children
7697 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7699 # we pass force_create=True to force the LVM creation
7700 for new_lv in new_lvs:
7701 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7702 _GetInstanceInfoText(self.instance), False)
7706 def _CheckDevices(self, node_name, iv_names):
7707 for name, (dev, _, _) in iv_names.iteritems():
7708 self.cfg.SetDiskID(dev, node_name)
7710 result = self.rpc.call_blockdev_find(node_name, dev)
7712 msg = result.fail_msg
7713 if msg or not result.payload:
7715 msg = "disk not found"
7716 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7719 if result.payload.is_degraded:
7720 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7722 def _RemoveOldStorage(self, node_name, iv_names):
7723 for name, (_, old_lvs, _) in iv_names.iteritems():
7724 self.lu.LogInfo("Remove logical volumes for %s" % name)
7727 self.cfg.SetDiskID(lv, node_name)
7729 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7731 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7732 hint="remove unused LVs manually")
7734 def _ReleaseNodeLock(self, node_name):
7735 """Releases the lock for a given node."""
7736 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7738 def _ExecDrbd8DiskOnly(self, feedback_fn):
7739 """Replace a disk on the primary or secondary for DRBD 8.
7741 The algorithm for replace is quite complicated:
7743 1. for each disk to be replaced:
7745 1. create new LVs on the target node with unique names
7746 1. detach old LVs from the drbd device
7747 1. rename old LVs to name_replaced.<time_t>
7748 1. rename new LVs to old LVs
7749 1. attach the new LVs (with the old names now) to the drbd device
7751 1. wait for sync across all devices
7753 1. for each modified disk:
7755 1. remove old LVs (which have the name name_replaces.<time_t>)
7757 Failures are not very well handled.
7762 # Step: check device activation
7763 self.lu.LogStep(1, steps_total, "Check device existence")
7764 self._CheckDisksExistence([self.other_node, self.target_node])
7765 self._CheckVolumeGroup([self.target_node, self.other_node])
7767 # Step: check other node consistency
7768 self.lu.LogStep(2, steps_total, "Check peer consistency")
7769 self._CheckDisksConsistency(self.other_node,
7770 self.other_node == self.instance.primary_node,
7773 # Step: create new storage
7774 self.lu.LogStep(3, steps_total, "Allocate new storage")
7775 iv_names = self._CreateNewStorage(self.target_node)
7777 # Step: for each lv, detach+rename*2+attach
7778 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7779 for dev, old_lvs, new_lvs in iv_names.itervalues():
7780 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7782 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7784 result.Raise("Can't detach drbd from local storage on node"
7785 " %s for device %s" % (self.target_node, dev.iv_name))
7787 #cfg.Update(instance)
7789 # ok, we created the new LVs, so now we know we have the needed
7790 # storage; as such, we proceed on the target node to rename
7791 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7792 # using the assumption that logical_id == physical_id (which in
7793 # turn is the unique_id on that node)
7795 # FIXME(iustin): use a better name for the replaced LVs
7796 temp_suffix = int(time.time())
7797 ren_fn = lambda d, suff: (d.physical_id[0],
7798 d.physical_id[1] + "_replaced-%s" % suff)
7800 # Build the rename list based on what LVs exist on the node
7801 rename_old_to_new = []
7802 for to_ren in old_lvs:
7803 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7804 if not result.fail_msg and result.payload:
7806 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7808 self.lu.LogInfo("Renaming the old LVs on the target node")
7809 result = self.rpc.call_blockdev_rename(self.target_node,
7811 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7813 # Now we rename the new LVs to the old LVs
7814 self.lu.LogInfo("Renaming the new LVs on the target node")
7815 rename_new_to_old = [(new, old.physical_id)
7816 for old, new in zip(old_lvs, new_lvs)]
7817 result = self.rpc.call_blockdev_rename(self.target_node,
7819 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7821 for old, new in zip(old_lvs, new_lvs):
7822 new.logical_id = old.logical_id
7823 self.cfg.SetDiskID(new, self.target_node)
7825 for disk in old_lvs:
7826 disk.logical_id = ren_fn(disk, temp_suffix)
7827 self.cfg.SetDiskID(disk, self.target_node)
7829 # Now that the new lvs have the old name, we can add them to the device
7830 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7831 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7833 msg = result.fail_msg
7835 for new_lv in new_lvs:
7836 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7839 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7840 hint=("cleanup manually the unused logical"
7842 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7844 dev.children = new_lvs
7846 self.cfg.Update(self.instance, feedback_fn)
7849 if self.early_release:
7850 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7852 self._RemoveOldStorage(self.target_node, iv_names)
7853 # WARNING: we release both node locks here, do not do other RPCs
7854 # than WaitForSync to the primary node
7855 self._ReleaseNodeLock([self.target_node, self.other_node])
7858 # This can fail as the old devices are degraded and _WaitForSync
7859 # does a combined result over all disks, so we don't check its return value
7860 self.lu.LogStep(cstep, steps_total, "Sync devices")
7862 _WaitForSync(self.lu, self.instance)
7864 # Check all devices manually
7865 self._CheckDevices(self.instance.primary_node, iv_names)
7867 # Step: remove old storage
7868 if not self.early_release:
7869 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7871 self._RemoveOldStorage(self.target_node, iv_names)
7873 def _ExecDrbd8Secondary(self, feedback_fn):
7874 """Replace the secondary node for DRBD 8.
7876 The algorithm for replace is quite complicated:
7877 - for all disks of the instance:
7878 - create new LVs on the new node with same names
7879 - shutdown the drbd device on the old secondary
7880 - disconnect the drbd network on the primary
7881 - create the drbd device on the new secondary
7882 - network attach the drbd on the primary, using an artifice:
7883 the drbd code for Attach() will connect to the network if it
7884 finds a device which is connected to the good local disks but
7886 - wait for sync across all devices
7887 - remove all disks from the old secondary
7889 Failures are not very well handled.
7894 # Step: check device activation
7895 self.lu.LogStep(1, steps_total, "Check device existence")
7896 self._CheckDisksExistence([self.instance.primary_node])
7897 self._CheckVolumeGroup([self.instance.primary_node])
7899 # Step: check other node consistency
7900 self.lu.LogStep(2, steps_total, "Check peer consistency")
7901 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7903 # Step: create new storage
7904 self.lu.LogStep(3, steps_total, "Allocate new storage")
7905 for idx, dev in enumerate(self.instance.disks):
7906 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7907 (self.new_node, idx))
7908 # we pass force_create=True to force LVM creation
7909 for new_lv in dev.children:
7910 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7911 _GetInstanceInfoText(self.instance), False)
7913 # Step 4: dbrd minors and drbd setups changes
7914 # after this, we must manually remove the drbd minors on both the
7915 # error and the success paths
7916 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7917 minors = self.cfg.AllocateDRBDMinor([self.new_node
7918 for dev in self.instance.disks],
7920 logging.debug("Allocated minors %r", minors)
7923 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7924 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7925 (self.new_node, idx))
7926 # create new devices on new_node; note that we create two IDs:
7927 # one without port, so the drbd will be activated without
7928 # networking information on the new node at this stage, and one
7929 # with network, for the latter activation in step 4
7930 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7931 if self.instance.primary_node == o_node1:
7934 assert self.instance.primary_node == o_node2, "Three-node instance?"
7937 new_alone_id = (self.instance.primary_node, self.new_node, None,
7938 p_minor, new_minor, o_secret)
7939 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7940 p_minor, new_minor, o_secret)
7942 iv_names[idx] = (dev, dev.children, new_net_id)
7943 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7945 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7946 logical_id=new_alone_id,
7947 children=dev.children,
7950 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7951 _GetInstanceInfoText(self.instance), False)
7952 except errors.GenericError:
7953 self.cfg.ReleaseDRBDMinors(self.instance.name)
7956 # We have new devices, shutdown the drbd on the old secondary
7957 for idx, dev in enumerate(self.instance.disks):
7958 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7959 self.cfg.SetDiskID(dev, self.target_node)
7960 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7962 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7963 "node: %s" % (idx, msg),
7964 hint=("Please cleanup this device manually as"
7965 " soon as possible"))
7967 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7968 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7969 self.node_secondary_ip,
7970 self.instance.disks)\
7971 [self.instance.primary_node]
7973 msg = result.fail_msg
7975 # detaches didn't succeed (unlikely)
7976 self.cfg.ReleaseDRBDMinors(self.instance.name)
7977 raise errors.OpExecError("Can't detach the disks from the network on"
7978 " old node: %s" % (msg,))
7980 # if we managed to detach at least one, we update all the disks of
7981 # the instance to point to the new secondary
7982 self.lu.LogInfo("Updating instance configuration")
7983 for dev, _, new_logical_id in iv_names.itervalues():
7984 dev.logical_id = new_logical_id
7985 self.cfg.SetDiskID(dev, self.instance.primary_node)
7987 self.cfg.Update(self.instance, feedback_fn)
7989 # and now perform the drbd attach
7990 self.lu.LogInfo("Attaching primary drbds to new secondary"
7991 " (standalone => connected)")
7992 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7994 self.node_secondary_ip,
7995 self.instance.disks,
7998 for to_node, to_result in result.items():
7999 msg = to_result.fail_msg
8001 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8003 hint=("please do a gnt-instance info to see the"
8004 " status of disks"))
8006 if self.early_release:
8007 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8009 self._RemoveOldStorage(self.target_node, iv_names)
8010 # WARNING: we release all node locks here, do not do other RPCs
8011 # than WaitForSync to the primary node
8012 self._ReleaseNodeLock([self.instance.primary_node,
8017 # This can fail as the old devices are degraded and _WaitForSync
8018 # does a combined result over all disks, so we don't check its return value
8019 self.lu.LogStep(cstep, steps_total, "Sync devices")
8021 _WaitForSync(self.lu, self.instance)
8023 # Check all devices manually
8024 self._CheckDevices(self.instance.primary_node, iv_names)
8026 # Step: remove old storage
8027 if not self.early_release:
8028 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8029 self._RemoveOldStorage(self.target_node, iv_names)
8032 class LURepairNodeStorage(NoHooksLU):
8033 """Repairs the volume group on a node.
8036 _OP_REQP = [("node_name", _TNonEmptyString)]
8039 def CheckArguments(self):
8040 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8042 _CheckStorageType(self.op.storage_type)
8044 storage_type = self.op.storage_type
8046 if (constants.SO_FIX_CONSISTENCY not in
8047 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8048 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8049 " repaired" % storage_type,
8052 def ExpandNames(self):
8053 self.needed_locks = {
8054 locking.LEVEL_NODE: [self.op.node_name],
8057 def _CheckFaultyDisks(self, instance, node_name):
8058 """Ensure faulty disks abort the opcode or at least warn."""
8060 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8062 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8063 " node '%s'" % (instance.name, node_name),
8065 except errors.OpPrereqError, err:
8066 if self.op.ignore_consistency:
8067 self.proc.LogWarning(str(err.args[0]))
8071 def CheckPrereq(self):
8072 """Check prerequisites.
8075 # Check whether any instance on this node has faulty disks
8076 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8077 if not inst.admin_up:
8079 check_nodes = set(inst.all_nodes)
8080 check_nodes.discard(self.op.node_name)
8081 for inst_node_name in check_nodes:
8082 self._CheckFaultyDisks(inst, inst_node_name)
8084 def Exec(self, feedback_fn):
8085 feedback_fn("Repairing storage unit '%s' on %s ..." %
8086 (self.op.name, self.op.node_name))
8088 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8089 result = self.rpc.call_storage_execute(self.op.node_name,
8090 self.op.storage_type, st_args,
8092 constants.SO_FIX_CONSISTENCY)
8093 result.Raise("Failed to repair storage unit '%s' on %s" %
8094 (self.op.name, self.op.node_name))
8097 class LUNodeEvacuationStrategy(NoHooksLU):
8098 """Computes the node evacuation strategy.
8101 _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
8103 ("remote_node", None),
8104 ("iallocator", None),
8108 def CheckArguments(self):
8109 if self.op.remote_node is not None and self.op.iallocator is not None:
8110 raise errors.OpPrereqError("Give either the iallocator or the new"
8111 " secondary, not both", errors.ECODE_INVAL)
8113 def ExpandNames(self):
8114 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8115 self.needed_locks = locks = {}
8116 if self.op.remote_node is None:
8117 locks[locking.LEVEL_NODE] = locking.ALL_SET
8119 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8120 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8122 def Exec(self, feedback_fn):
8123 if self.op.remote_node is not None:
8125 for node in self.op.nodes:
8126 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8129 if i.primary_node == self.op.remote_node:
8130 raise errors.OpPrereqError("Node %s is the primary node of"
8131 " instance %s, cannot use it as"
8133 (self.op.remote_node, i.name),
8135 result.append([i.name, self.op.remote_node])
8137 ial = IAllocator(self.cfg, self.rpc,
8138 mode=constants.IALLOCATOR_MODE_MEVAC,
8139 evac_nodes=self.op.nodes)
8140 ial.Run(self.op.iallocator, validate=True)
8142 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8148 class LUGrowDisk(LogicalUnit):
8149 """Grow a disk of an instance.
8153 HTYPE = constants.HTYPE_INSTANCE
8155 ("instance_name", _TNonEmptyString),
8158 ("wait_for_sync", _TBool),
8162 def ExpandNames(self):
8163 self._ExpandAndLockInstance()
8164 self.needed_locks[locking.LEVEL_NODE] = []
8165 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8167 def DeclareLocks(self, level):
8168 if level == locking.LEVEL_NODE:
8169 self._LockInstancesNodes()
8171 def BuildHooksEnv(self):
8174 This runs on the master, the primary and all the secondaries.
8178 "DISK": self.op.disk,
8179 "AMOUNT": self.op.amount,
8181 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8182 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8185 def CheckPrereq(self):
8186 """Check prerequisites.
8188 This checks that the instance is in the cluster.
8191 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8192 assert instance is not None, \
8193 "Cannot retrieve locked instance %s" % self.op.instance_name
8194 nodenames = list(instance.all_nodes)
8195 for node in nodenames:
8196 _CheckNodeOnline(self, node)
8198 self.instance = instance
8200 if instance.disk_template not in constants.DTS_GROWABLE:
8201 raise errors.OpPrereqError("Instance's disk layout does not support"
8202 " growing.", errors.ECODE_INVAL)
8204 self.disk = instance.FindDisk(self.op.disk)
8206 if instance.disk_template != constants.DT_FILE:
8207 # TODO: check the free disk space for file, when that feature will be
8209 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8211 def Exec(self, feedback_fn):
8212 """Execute disk grow.
8215 instance = self.instance
8218 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8220 raise errors.OpExecError("Cannot activate block device to grow")
8222 for node in instance.all_nodes:
8223 self.cfg.SetDiskID(disk, node)
8224 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8225 result.Raise("Grow request failed to node %s" % node)
8227 # TODO: Rewrite code to work properly
8228 # DRBD goes into sync mode for a short amount of time after executing the
8229 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8230 # calling "resize" in sync mode fails. Sleeping for a short amount of
8231 # time is a work-around.
8234 disk.RecordGrow(self.op.amount)
8235 self.cfg.Update(instance, feedback_fn)
8236 if self.op.wait_for_sync:
8237 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8239 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8240 " status.\nPlease check the instance.")
8241 if not instance.admin_up:
8242 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8243 elif not instance.admin_up:
8244 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8245 " not supposed to be running because no wait for"
8246 " sync mode was requested.")
8249 class LUQueryInstanceData(NoHooksLU):
8250 """Query runtime instance data.
8254 ("instances", _TListOf(_TNonEmptyString)),
8259 def ExpandNames(self):
8260 self.needed_locks = {}
8261 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8263 if self.op.instances:
8264 self.wanted_names = []
8265 for name in self.op.instances:
8266 full_name = _ExpandInstanceName(self.cfg, name)
8267 self.wanted_names.append(full_name)
8268 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8270 self.wanted_names = None
8271 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8273 self.needed_locks[locking.LEVEL_NODE] = []
8274 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8276 def DeclareLocks(self, level):
8277 if level == locking.LEVEL_NODE:
8278 self._LockInstancesNodes()
8280 def CheckPrereq(self):
8281 """Check prerequisites.
8283 This only checks the optional instance list against the existing names.
8286 if self.wanted_names is None:
8287 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8289 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8290 in self.wanted_names]
8292 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8293 """Returns the status of a block device
8296 if self.op.static or not node:
8299 self.cfg.SetDiskID(dev, node)
8301 result = self.rpc.call_blockdev_find(node, dev)
8305 result.Raise("Can't compute disk status for %s" % instance_name)
8307 status = result.payload
8311 return (status.dev_path, status.major, status.minor,
8312 status.sync_percent, status.estimated_time,
8313 status.is_degraded, status.ldisk_status)
8315 def _ComputeDiskStatus(self, instance, snode, dev):
8316 """Compute block device status.
8319 if dev.dev_type in constants.LDS_DRBD:
8320 # we change the snode then (otherwise we use the one passed in)
8321 if dev.logical_id[0] == instance.primary_node:
8322 snode = dev.logical_id[1]
8324 snode = dev.logical_id[0]
8326 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8328 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8331 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8332 for child in dev.children]
8337 "iv_name": dev.iv_name,
8338 "dev_type": dev.dev_type,
8339 "logical_id": dev.logical_id,
8340 "physical_id": dev.physical_id,
8341 "pstatus": dev_pstatus,
8342 "sstatus": dev_sstatus,
8343 "children": dev_children,
8350 def Exec(self, feedback_fn):
8351 """Gather and return data"""
8354 cluster = self.cfg.GetClusterInfo()
8356 for instance in self.wanted_instances:
8357 if not self.op.static:
8358 remote_info = self.rpc.call_instance_info(instance.primary_node,
8360 instance.hypervisor)
8361 remote_info.Raise("Error checking node %s" % instance.primary_node)
8362 remote_info = remote_info.payload
8363 if remote_info and "state" in remote_info:
8366 remote_state = "down"
8369 if instance.admin_up:
8372 config_state = "down"
8374 disks = [self._ComputeDiskStatus(instance, None, device)
8375 for device in instance.disks]
8378 "name": instance.name,
8379 "config_state": config_state,
8380 "run_state": remote_state,
8381 "pnode": instance.primary_node,
8382 "snodes": instance.secondary_nodes,
8384 # this happens to be the same format used for hooks
8385 "nics": _NICListToTuple(self, instance.nics),
8386 "disk_template": instance.disk_template,
8388 "hypervisor": instance.hypervisor,
8389 "network_port": instance.network_port,
8390 "hv_instance": instance.hvparams,
8391 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8392 "be_instance": instance.beparams,
8393 "be_actual": cluster.FillBE(instance),
8394 "os_instance": instance.osparams,
8395 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8396 "serial_no": instance.serial_no,
8397 "mtime": instance.mtime,
8398 "ctime": instance.ctime,
8399 "uuid": instance.uuid,
8402 result[instance.name] = idict
8407 class LUSetInstanceParams(LogicalUnit):
8408 """Modifies an instances's parameters.
8411 HPATH = "instance-modify"
8412 HTYPE = constants.HTYPE_INSTANCE
8413 _OP_REQP = [("instance_name", _TNonEmptyString)]
8415 ("nics", _EmptyList),
8416 ("disks", _EmptyList),
8417 ("beparams", _EmptyDict),
8418 ("hvparams", _EmptyDict),
8419 ("disk_template", None),
8420 ("remote_node", None),
8422 ("force_variant", False),
8428 def CheckArguments(self):
8429 if not (self.op.nics or self.op.disks or self.op.disk_template or
8430 self.op.hvparams or self.op.beparams or self.op.os_name):
8431 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8433 if self.op.hvparams:
8434 _CheckGlobalHvParams(self.op.hvparams)
8438 for disk_op, disk_dict in self.op.disks:
8439 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8440 if disk_op == constants.DDM_REMOVE:
8443 elif disk_op == constants.DDM_ADD:
8446 if not isinstance(disk_op, int):
8447 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8448 if not isinstance(disk_dict, dict):
8449 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8450 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8452 if disk_op == constants.DDM_ADD:
8453 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8454 if mode not in constants.DISK_ACCESS_SET:
8455 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8457 size = disk_dict.get('size', None)
8459 raise errors.OpPrereqError("Required disk parameter size missing",
8463 except (TypeError, ValueError), err:
8464 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8465 str(err), errors.ECODE_INVAL)
8466 disk_dict['size'] = size
8468 # modification of disk
8469 if 'size' in disk_dict:
8470 raise errors.OpPrereqError("Disk size change not possible, use"
8471 " grow-disk", errors.ECODE_INVAL)
8473 if disk_addremove > 1:
8474 raise errors.OpPrereqError("Only one disk add or remove operation"
8475 " supported at a time", errors.ECODE_INVAL)
8477 if self.op.disks and self.op.disk_template is not None:
8478 raise errors.OpPrereqError("Disk template conversion and other disk"
8479 " changes not supported at the same time",
8482 if self.op.disk_template:
8483 _CheckDiskTemplate(self.op.disk_template)
8484 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8485 self.op.remote_node is None):
8486 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8487 " one requires specifying a secondary node",
8492 for nic_op, nic_dict in self.op.nics:
8493 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8494 if nic_op == constants.DDM_REMOVE:
8497 elif nic_op == constants.DDM_ADD:
8500 if not isinstance(nic_op, int):
8501 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8502 if not isinstance(nic_dict, dict):
8503 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8504 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8506 # nic_dict should be a dict
8507 nic_ip = nic_dict.get('ip', None)
8508 if nic_ip is not None:
8509 if nic_ip.lower() == constants.VALUE_NONE:
8510 nic_dict['ip'] = None
8512 if not utils.IsValidIP(nic_ip):
8513 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8516 nic_bridge = nic_dict.get('bridge', None)
8517 nic_link = nic_dict.get('link', None)
8518 if nic_bridge and nic_link:
8519 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8520 " at the same time", errors.ECODE_INVAL)
8521 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8522 nic_dict['bridge'] = None
8523 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8524 nic_dict['link'] = None
8526 if nic_op == constants.DDM_ADD:
8527 nic_mac = nic_dict.get('mac', None)
8529 nic_dict['mac'] = constants.VALUE_AUTO
8531 if 'mac' in nic_dict:
8532 nic_mac = nic_dict['mac']
8533 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8534 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8536 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8537 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8538 " modifying an existing nic",
8541 if nic_addremove > 1:
8542 raise errors.OpPrereqError("Only one NIC add or remove operation"
8543 " supported at a time", errors.ECODE_INVAL)
8545 def ExpandNames(self):
8546 self._ExpandAndLockInstance()
8547 self.needed_locks[locking.LEVEL_NODE] = []
8548 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8550 def DeclareLocks(self, level):
8551 if level == locking.LEVEL_NODE:
8552 self._LockInstancesNodes()
8553 if self.op.disk_template and self.op.remote_node:
8554 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8555 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8557 def BuildHooksEnv(self):
8560 This runs on the master, primary and secondaries.
8564 if constants.BE_MEMORY in self.be_new:
8565 args['memory'] = self.be_new[constants.BE_MEMORY]
8566 if constants.BE_VCPUS in self.be_new:
8567 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8568 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8569 # information at all.
8572 nic_override = dict(self.op.nics)
8573 for idx, nic in enumerate(self.instance.nics):
8574 if idx in nic_override:
8575 this_nic_override = nic_override[idx]
8577 this_nic_override = {}
8578 if 'ip' in this_nic_override:
8579 ip = this_nic_override['ip']
8582 if 'mac' in this_nic_override:
8583 mac = this_nic_override['mac']
8586 if idx in self.nic_pnew:
8587 nicparams = self.nic_pnew[idx]
8589 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8590 mode = nicparams[constants.NIC_MODE]
8591 link = nicparams[constants.NIC_LINK]
8592 args['nics'].append((ip, mac, mode, link))
8593 if constants.DDM_ADD in nic_override:
8594 ip = nic_override[constants.DDM_ADD].get('ip', None)
8595 mac = nic_override[constants.DDM_ADD]['mac']
8596 nicparams = self.nic_pnew[constants.DDM_ADD]
8597 mode = nicparams[constants.NIC_MODE]
8598 link = nicparams[constants.NIC_LINK]
8599 args['nics'].append((ip, mac, mode, link))
8600 elif constants.DDM_REMOVE in nic_override:
8601 del args['nics'][-1]
8603 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8604 if self.op.disk_template:
8605 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8606 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8609 def CheckPrereq(self):
8610 """Check prerequisites.
8612 This only checks the instance list against the existing names.
8615 # checking the new params on the primary/secondary nodes
8617 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8618 cluster = self.cluster = self.cfg.GetClusterInfo()
8619 assert self.instance is not None, \
8620 "Cannot retrieve locked instance %s" % self.op.instance_name
8621 pnode = instance.primary_node
8622 nodelist = list(instance.all_nodes)
8625 if self.op.os_name and not self.op.force:
8626 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8627 self.op.force_variant)
8628 instance_os = self.op.os_name
8630 instance_os = instance.os
8632 if self.op.disk_template:
8633 if instance.disk_template == self.op.disk_template:
8634 raise errors.OpPrereqError("Instance already has disk template %s" %
8635 instance.disk_template, errors.ECODE_INVAL)
8637 if (instance.disk_template,
8638 self.op.disk_template) not in self._DISK_CONVERSIONS:
8639 raise errors.OpPrereqError("Unsupported disk template conversion from"
8640 " %s to %s" % (instance.disk_template,
8641 self.op.disk_template),
8643 if self.op.disk_template in constants.DTS_NET_MIRROR:
8644 _CheckNodeOnline(self, self.op.remote_node)
8645 _CheckNodeNotDrained(self, self.op.remote_node)
8646 disks = [{"size": d.size} for d in instance.disks]
8647 required = _ComputeDiskSize(self.op.disk_template, disks)
8648 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8649 _CheckInstanceDown(self, instance, "cannot change disk template")
8651 # hvparams processing
8652 if self.op.hvparams:
8653 hv_type = instance.hypervisor
8654 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8655 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8656 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8659 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8660 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8661 self.hv_new = hv_new # the new actual values
8662 self.hv_inst = i_hvdict # the new dict (without defaults)
8664 self.hv_new = self.hv_inst = {}
8666 # beparams processing
8667 if self.op.beparams:
8668 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8670 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8671 be_new = cluster.SimpleFillBE(i_bedict)
8672 self.be_new = be_new # the new actual values
8673 self.be_inst = i_bedict # the new dict (without defaults)
8675 self.be_new = self.be_inst = {}
8677 # osparams processing
8678 if self.op.osparams:
8679 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8680 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8681 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8682 self.os_inst = i_osdict # the new dict (without defaults)
8684 self.os_new = self.os_inst = {}
8688 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8689 mem_check_list = [pnode]
8690 if be_new[constants.BE_AUTO_BALANCE]:
8691 # either we changed auto_balance to yes or it was from before
8692 mem_check_list.extend(instance.secondary_nodes)
8693 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8694 instance.hypervisor)
8695 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8696 instance.hypervisor)
8697 pninfo = nodeinfo[pnode]
8698 msg = pninfo.fail_msg
8700 # Assume the primary node is unreachable and go ahead
8701 self.warn.append("Can't get info from primary node %s: %s" %
8703 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8704 self.warn.append("Node data from primary node %s doesn't contain"
8705 " free memory information" % pnode)
8706 elif instance_info.fail_msg:
8707 self.warn.append("Can't get instance runtime information: %s" %
8708 instance_info.fail_msg)
8710 if instance_info.payload:
8711 current_mem = int(instance_info.payload['memory'])
8713 # Assume instance not running
8714 # (there is a slight race condition here, but it's not very probable,
8715 # and we have no other way to check)
8717 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8718 pninfo.payload['memory_free'])
8720 raise errors.OpPrereqError("This change will prevent the instance"
8721 " from starting, due to %d MB of memory"
8722 " missing on its primary node" % miss_mem,
8725 if be_new[constants.BE_AUTO_BALANCE]:
8726 for node, nres in nodeinfo.items():
8727 if node not in instance.secondary_nodes:
8731 self.warn.append("Can't get info from secondary node %s: %s" %
8733 elif not isinstance(nres.payload.get('memory_free', None), int):
8734 self.warn.append("Secondary node %s didn't return free"
8735 " memory information" % node)
8736 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8737 self.warn.append("Not enough memory to failover instance to"
8738 " secondary node %s" % node)
8743 for nic_op, nic_dict in self.op.nics:
8744 if nic_op == constants.DDM_REMOVE:
8745 if not instance.nics:
8746 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8749 if nic_op != constants.DDM_ADD:
8751 if not instance.nics:
8752 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8753 " no NICs" % nic_op,
8755 if nic_op < 0 or nic_op >= len(instance.nics):
8756 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8758 (nic_op, len(instance.nics) - 1),
8760 old_nic_params = instance.nics[nic_op].nicparams
8761 old_nic_ip = instance.nics[nic_op].ip
8766 update_params_dict = dict([(key, nic_dict[key])
8767 for key in constants.NICS_PARAMETERS
8768 if key in nic_dict])
8770 if 'bridge' in nic_dict:
8771 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8773 new_nic_params = _GetUpdatedParams(old_nic_params,
8775 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8776 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8777 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8778 self.nic_pinst[nic_op] = new_nic_params
8779 self.nic_pnew[nic_op] = new_filled_nic_params
8780 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8782 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8783 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8784 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8786 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8788 self.warn.append(msg)
8790 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8791 if new_nic_mode == constants.NIC_MODE_ROUTED:
8792 if 'ip' in nic_dict:
8793 nic_ip = nic_dict['ip']
8797 raise errors.OpPrereqError('Cannot set the nic ip to None'
8798 ' on a routed nic', errors.ECODE_INVAL)
8799 if 'mac' in nic_dict:
8800 nic_mac = nic_dict['mac']
8802 raise errors.OpPrereqError('Cannot set the nic mac to None',
8804 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8805 # otherwise generate the mac
8806 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8808 # or validate/reserve the current one
8810 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8811 except errors.ReservationError:
8812 raise errors.OpPrereqError("MAC address %s already in use"
8813 " in cluster" % nic_mac,
8814 errors.ECODE_NOTUNIQUE)
8817 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8818 raise errors.OpPrereqError("Disk operations not supported for"
8819 " diskless instances",
8821 for disk_op, _ in self.op.disks:
8822 if disk_op == constants.DDM_REMOVE:
8823 if len(instance.disks) == 1:
8824 raise errors.OpPrereqError("Cannot remove the last disk of"
8825 " an instance", errors.ECODE_INVAL)
8826 _CheckInstanceDown(self, instance, "cannot remove disks")
8828 if (disk_op == constants.DDM_ADD and
8829 len(instance.nics) >= constants.MAX_DISKS):
8830 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8831 " add more" % constants.MAX_DISKS,
8833 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8835 if disk_op < 0 or disk_op >= len(instance.disks):
8836 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8838 (disk_op, len(instance.disks)),
8843 def _ConvertPlainToDrbd(self, feedback_fn):
8844 """Converts an instance from plain to drbd.
8847 feedback_fn("Converting template to drbd")
8848 instance = self.instance
8849 pnode = instance.primary_node
8850 snode = self.op.remote_node
8852 # create a fake disk info for _GenerateDiskTemplate
8853 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8854 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8855 instance.name, pnode, [snode],
8856 disk_info, None, None, 0)
8857 info = _GetInstanceInfoText(instance)
8858 feedback_fn("Creating aditional volumes...")
8859 # first, create the missing data and meta devices
8860 for disk in new_disks:
8861 # unfortunately this is... not too nice
8862 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8864 for child in disk.children:
8865 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8866 # at this stage, all new LVs have been created, we can rename the
8868 feedback_fn("Renaming original volumes...")
8869 rename_list = [(o, n.children[0].logical_id)
8870 for (o, n) in zip(instance.disks, new_disks)]
8871 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8872 result.Raise("Failed to rename original LVs")
8874 feedback_fn("Initializing DRBD devices...")
8875 # all child devices are in place, we can now create the DRBD devices
8876 for disk in new_disks:
8877 for node in [pnode, snode]:
8878 f_create = node == pnode
8879 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8881 # at this point, the instance has been modified
8882 instance.disk_template = constants.DT_DRBD8
8883 instance.disks = new_disks
8884 self.cfg.Update(instance, feedback_fn)
8886 # disks are created, waiting for sync
8887 disk_abort = not _WaitForSync(self, instance)
8889 raise errors.OpExecError("There are some degraded disks for"
8890 " this instance, please cleanup manually")
8892 def _ConvertDrbdToPlain(self, feedback_fn):
8893 """Converts an instance from drbd to plain.
8896 instance = self.instance
8897 assert len(instance.secondary_nodes) == 1
8898 pnode = instance.primary_node
8899 snode = instance.secondary_nodes[0]
8900 feedback_fn("Converting template to plain")
8902 old_disks = instance.disks
8903 new_disks = [d.children[0] for d in old_disks]
8905 # copy over size and mode
8906 for parent, child in zip(old_disks, new_disks):
8907 child.size = parent.size
8908 child.mode = parent.mode
8910 # update instance structure
8911 instance.disks = new_disks
8912 instance.disk_template = constants.DT_PLAIN
8913 self.cfg.Update(instance, feedback_fn)
8915 feedback_fn("Removing volumes on the secondary node...")
8916 for disk in old_disks:
8917 self.cfg.SetDiskID(disk, snode)
8918 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8920 self.LogWarning("Could not remove block device %s on node %s,"
8921 " continuing anyway: %s", disk.iv_name, snode, msg)
8923 feedback_fn("Removing unneeded volumes on the primary node...")
8924 for idx, disk in enumerate(old_disks):
8925 meta = disk.children[1]
8926 self.cfg.SetDiskID(meta, pnode)
8927 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8929 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8930 " continuing anyway: %s", idx, pnode, msg)
8933 def Exec(self, feedback_fn):
8934 """Modifies an instance.
8936 All parameters take effect only at the next restart of the instance.
8939 # Process here the warnings from CheckPrereq, as we don't have a
8940 # feedback_fn there.
8941 for warn in self.warn:
8942 feedback_fn("WARNING: %s" % warn)
8945 instance = self.instance
8947 for disk_op, disk_dict in self.op.disks:
8948 if disk_op == constants.DDM_REMOVE:
8949 # remove the last disk
8950 device = instance.disks.pop()
8951 device_idx = len(instance.disks)
8952 for node, disk in device.ComputeNodeTree(instance.primary_node):
8953 self.cfg.SetDiskID(disk, node)
8954 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8956 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8957 " continuing anyway", device_idx, node, msg)
8958 result.append(("disk/%d" % device_idx, "remove"))
8959 elif disk_op == constants.DDM_ADD:
8961 if instance.disk_template == constants.DT_FILE:
8962 file_driver, file_path = instance.disks[0].logical_id
8963 file_path = os.path.dirname(file_path)
8965 file_driver = file_path = None
8966 disk_idx_base = len(instance.disks)
8967 new_disk = _GenerateDiskTemplate(self,
8968 instance.disk_template,
8969 instance.name, instance.primary_node,
8970 instance.secondary_nodes,
8975 instance.disks.append(new_disk)
8976 info = _GetInstanceInfoText(instance)
8978 logging.info("Creating volume %s for instance %s",
8979 new_disk.iv_name, instance.name)
8980 # Note: this needs to be kept in sync with _CreateDisks
8982 for node in instance.all_nodes:
8983 f_create = node == instance.primary_node
8985 _CreateBlockDev(self, node, instance, new_disk,
8986 f_create, info, f_create)
8987 except errors.OpExecError, err:
8988 self.LogWarning("Failed to create volume %s (%s) on"
8990 new_disk.iv_name, new_disk, node, err)
8991 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8992 (new_disk.size, new_disk.mode)))
8994 # change a given disk
8995 instance.disks[disk_op].mode = disk_dict['mode']
8996 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8998 if self.op.disk_template:
8999 r_shut = _ShutdownInstanceDisks(self, instance)
9001 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9002 " proceed with disk template conversion")
9003 mode = (instance.disk_template, self.op.disk_template)
9005 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9007 self.cfg.ReleaseDRBDMinors(instance.name)
9009 result.append(("disk_template", self.op.disk_template))
9012 for nic_op, nic_dict in self.op.nics:
9013 if nic_op == constants.DDM_REMOVE:
9014 # remove the last nic
9015 del instance.nics[-1]
9016 result.append(("nic.%d" % len(instance.nics), "remove"))
9017 elif nic_op == constants.DDM_ADD:
9018 # mac and bridge should be set, by now
9019 mac = nic_dict['mac']
9020 ip = nic_dict.get('ip', None)
9021 nicparams = self.nic_pinst[constants.DDM_ADD]
9022 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9023 instance.nics.append(new_nic)
9024 result.append(("nic.%d" % (len(instance.nics) - 1),
9025 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9026 (new_nic.mac, new_nic.ip,
9027 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9028 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9031 for key in 'mac', 'ip':
9033 setattr(instance.nics[nic_op], key, nic_dict[key])
9034 if nic_op in self.nic_pinst:
9035 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9036 for key, val in nic_dict.iteritems():
9037 result.append(("nic.%s/%d" % (key, nic_op), val))
9040 if self.op.hvparams:
9041 instance.hvparams = self.hv_inst
9042 for key, val in self.op.hvparams.iteritems():
9043 result.append(("hv/%s" % key, val))
9046 if self.op.beparams:
9047 instance.beparams = self.be_inst
9048 for key, val in self.op.beparams.iteritems():
9049 result.append(("be/%s" % key, val))
9053 instance.os = self.op.os_name
9056 if self.op.osparams:
9057 instance.osparams = self.os_inst
9058 for key, val in self.op.osparams.iteritems():
9059 result.append(("os/%s" % key, val))
9061 self.cfg.Update(instance, feedback_fn)
9065 _DISK_CONVERSIONS = {
9066 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9067 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9071 class LUQueryExports(NoHooksLU):
9072 """Query the exports list
9075 _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
9078 def ExpandNames(self):
9079 self.needed_locks = {}
9080 self.share_locks[locking.LEVEL_NODE] = 1
9081 if not self.op.nodes:
9082 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9084 self.needed_locks[locking.LEVEL_NODE] = \
9085 _GetWantedNodes(self, self.op.nodes)
9087 def Exec(self, feedback_fn):
9088 """Compute the list of all the exported system images.
9091 @return: a dictionary with the structure node->(export-list)
9092 where export-list is a list of the instances exported on
9096 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9097 rpcresult = self.rpc.call_export_list(self.nodes)
9099 for node in rpcresult:
9100 if rpcresult[node].fail_msg:
9101 result[node] = False
9103 result[node] = rpcresult[node].payload
9108 class LUPrepareExport(NoHooksLU):
9109 """Prepares an instance for an export and returns useful information.
9113 ("instance_name", _TNonEmptyString),
9114 ("mode", _TElemOf(constants.EXPORT_MODES)),
9118 def ExpandNames(self):
9119 self._ExpandAndLockInstance()
9121 def CheckPrereq(self):
9122 """Check prerequisites.
9125 instance_name = self.op.instance_name
9127 self.instance = self.cfg.GetInstanceInfo(instance_name)
9128 assert self.instance is not None, \
9129 "Cannot retrieve locked instance %s" % self.op.instance_name
9130 _CheckNodeOnline(self, self.instance.primary_node)
9132 self._cds = _GetClusterDomainSecret()
9134 def Exec(self, feedback_fn):
9135 """Prepares an instance for an export.
9138 instance = self.instance
9140 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9141 salt = utils.GenerateSecret(8)
9143 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9144 result = self.rpc.call_x509_cert_create(instance.primary_node,
9145 constants.RIE_CERT_VALIDITY)
9146 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9148 (name, cert_pem) = result.payload
9150 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9154 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9155 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9157 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9163 class LUExportInstance(LogicalUnit):
9164 """Export an instance to an image in the cluster.
9167 HPATH = "instance-export"
9168 HTYPE = constants.HTYPE_INSTANCE
9170 ("instance_name", _TNonEmptyString),
9171 ("target_node", _TNonEmptyString),
9172 ("shutdown", _TBool),
9173 ("mode", _TElemOf(constants.EXPORT_MODES)),
9176 ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9177 ("remove_instance", False),
9178 ("ignore_remove_failures", False),
9179 ("mode", constants.EXPORT_MODE_LOCAL),
9180 ("x509_key_name", None),
9181 ("destination_x509_ca", None),
9185 def CheckArguments(self):
9186 """Check the arguments.
9189 self.x509_key_name = self.op.x509_key_name
9190 self.dest_x509_ca_pem = self.op.destination_x509_ca
9192 if self.op.remove_instance and not self.op.shutdown:
9193 raise errors.OpPrereqError("Can not remove instance without shutting it"
9196 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9197 if not self.x509_key_name:
9198 raise errors.OpPrereqError("Missing X509 key name for encryption",
9201 if not self.dest_x509_ca_pem:
9202 raise errors.OpPrereqError("Missing destination X509 CA",
9205 def ExpandNames(self):
9206 self._ExpandAndLockInstance()
9208 # Lock all nodes for local exports
9209 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9210 # FIXME: lock only instance primary and destination node
9212 # Sad but true, for now we have do lock all nodes, as we don't know where
9213 # the previous export might be, and in this LU we search for it and
9214 # remove it from its current node. In the future we could fix this by:
9215 # - making a tasklet to search (share-lock all), then create the
9216 # new one, then one to remove, after
9217 # - removing the removal operation altogether
9218 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9220 def DeclareLocks(self, level):
9221 """Last minute lock declaration."""
9222 # All nodes are locked anyway, so nothing to do here.
9224 def BuildHooksEnv(self):
9227 This will run on the master, primary node and target node.
9231 "EXPORT_MODE": self.op.mode,
9232 "EXPORT_NODE": self.op.target_node,
9233 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9234 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9235 # TODO: Generic function for boolean env variables
9236 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9239 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9241 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9243 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9244 nl.append(self.op.target_node)
9248 def CheckPrereq(self):
9249 """Check prerequisites.
9251 This checks that the instance and node names are valid.
9254 instance_name = self.op.instance_name
9256 self.instance = self.cfg.GetInstanceInfo(instance_name)
9257 assert self.instance is not None, \
9258 "Cannot retrieve locked instance %s" % self.op.instance_name
9259 _CheckNodeOnline(self, self.instance.primary_node)
9261 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9262 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9263 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9264 assert self.dst_node is not None
9266 _CheckNodeOnline(self, self.dst_node.name)
9267 _CheckNodeNotDrained(self, self.dst_node.name)
9270 self.dest_disk_info = None
9271 self.dest_x509_ca = None
9273 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9274 self.dst_node = None
9276 if len(self.op.target_node) != len(self.instance.disks):
9277 raise errors.OpPrereqError(("Received destination information for %s"
9278 " disks, but instance %s has %s disks") %
9279 (len(self.op.target_node), instance_name,
9280 len(self.instance.disks)),
9283 cds = _GetClusterDomainSecret()
9285 # Check X509 key name
9287 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9288 except (TypeError, ValueError), err:
9289 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9291 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9292 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9295 # Load and verify CA
9297 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9298 except OpenSSL.crypto.Error, err:
9299 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9300 (err, ), errors.ECODE_INVAL)
9302 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9303 if errcode is not None:
9304 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9305 (msg, ), errors.ECODE_INVAL)
9307 self.dest_x509_ca = cert
9309 # Verify target information
9311 for idx, disk_data in enumerate(self.op.target_node):
9313 (host, port, magic) = \
9314 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9315 except errors.GenericError, err:
9316 raise errors.OpPrereqError("Target info for disk %s: %s" %
9317 (idx, err), errors.ECODE_INVAL)
9319 disk_info.append((host, port, magic))
9321 assert len(disk_info) == len(self.op.target_node)
9322 self.dest_disk_info = disk_info
9325 raise errors.ProgrammerError("Unhandled export mode %r" %
9328 # instance disk type verification
9329 # TODO: Implement export support for file-based disks
9330 for disk in self.instance.disks:
9331 if disk.dev_type == constants.LD_FILE:
9332 raise errors.OpPrereqError("Export not supported for instances with"
9333 " file-based disks", errors.ECODE_INVAL)
9335 def _CleanupExports(self, feedback_fn):
9336 """Removes exports of current instance from all other nodes.
9338 If an instance in a cluster with nodes A..D was exported to node C, its
9339 exports will be removed from the nodes A, B and D.
9342 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9344 nodelist = self.cfg.GetNodeList()
9345 nodelist.remove(self.dst_node.name)
9347 # on one-node clusters nodelist will be empty after the removal
9348 # if we proceed the backup would be removed because OpQueryExports
9349 # substitutes an empty list with the full cluster node list.
9350 iname = self.instance.name
9352 feedback_fn("Removing old exports for instance %s" % iname)
9353 exportlist = self.rpc.call_export_list(nodelist)
9354 for node in exportlist:
9355 if exportlist[node].fail_msg:
9357 if iname in exportlist[node].payload:
9358 msg = self.rpc.call_export_remove(node, iname).fail_msg
9360 self.LogWarning("Could not remove older export for instance %s"
9361 " on node %s: %s", iname, node, msg)
9363 def Exec(self, feedback_fn):
9364 """Export an instance to an image in the cluster.
9367 assert self.op.mode in constants.EXPORT_MODES
9369 instance = self.instance
9370 src_node = instance.primary_node
9372 if self.op.shutdown:
9373 # shutdown the instance, but not the disks
9374 feedback_fn("Shutting down instance %s" % instance.name)
9375 result = self.rpc.call_instance_shutdown(src_node, instance,
9376 self.op.shutdown_timeout)
9377 # TODO: Maybe ignore failures if ignore_remove_failures is set
9378 result.Raise("Could not shutdown instance %s on"
9379 " node %s" % (instance.name, src_node))
9381 # set the disks ID correctly since call_instance_start needs the
9382 # correct drbd minor to create the symlinks
9383 for disk in instance.disks:
9384 self.cfg.SetDiskID(disk, src_node)
9386 activate_disks = (not instance.admin_up)
9389 # Activate the instance disks if we'exporting a stopped instance
9390 feedback_fn("Activating disks for %s" % instance.name)
9391 _StartInstanceDisks(self, instance, None)
9394 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9397 helper.CreateSnapshots()
9399 if (self.op.shutdown and instance.admin_up and
9400 not self.op.remove_instance):
9401 assert not activate_disks
9402 feedback_fn("Starting instance %s" % instance.name)
9403 result = self.rpc.call_instance_start(src_node, instance, None, None)
9404 msg = result.fail_msg
9406 feedback_fn("Failed to start instance: %s" % msg)
9407 _ShutdownInstanceDisks(self, instance)
9408 raise errors.OpExecError("Could not start instance: %s" % msg)
9410 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9411 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9412 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9413 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9414 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9416 (key_name, _, _) = self.x509_key_name
9419 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9422 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9423 key_name, dest_ca_pem,
9428 # Check for backwards compatibility
9429 assert len(dresults) == len(instance.disks)
9430 assert compat.all(isinstance(i, bool) for i in dresults), \
9431 "Not all results are boolean: %r" % dresults
9435 feedback_fn("Deactivating disks for %s" % instance.name)
9436 _ShutdownInstanceDisks(self, instance)
9438 # Remove instance if requested
9439 if self.op.remove_instance:
9440 if not (compat.all(dresults) and fin_resu):
9441 feedback_fn("Not removing instance %s as parts of the export failed" %
9444 feedback_fn("Removing instance %s" % instance.name)
9445 _RemoveInstance(self, feedback_fn, instance,
9446 self.op.ignore_remove_failures)
9448 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9449 self._CleanupExports(feedback_fn)
9451 return fin_resu, dresults
9454 class LURemoveExport(NoHooksLU):
9455 """Remove exports related to the named instance.
9458 _OP_REQP = [("instance_name", _TNonEmptyString)]
9461 def ExpandNames(self):
9462 self.needed_locks = {}
9463 # We need all nodes to be locked in order for RemoveExport to work, but we
9464 # don't need to lock the instance itself, as nothing will happen to it (and
9465 # we can remove exports also for a removed instance)
9466 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9468 def Exec(self, feedback_fn):
9469 """Remove any export.
9472 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9473 # If the instance was not found we'll try with the name that was passed in.
9474 # This will only work if it was an FQDN, though.
9476 if not instance_name:
9478 instance_name = self.op.instance_name
9480 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9481 exportlist = self.rpc.call_export_list(locked_nodes)
9483 for node in exportlist:
9484 msg = exportlist[node].fail_msg
9486 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9488 if instance_name in exportlist[node].payload:
9490 result = self.rpc.call_export_remove(node, instance_name)
9491 msg = result.fail_msg
9493 logging.error("Could not remove export for instance %s"
9494 " on node %s: %s", instance_name, node, msg)
9496 if fqdn_warn and not found:
9497 feedback_fn("Export not found. If trying to remove an export belonging"
9498 " to a deleted instance please use its Fully Qualified"
9502 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9505 This is an abstract class which is the parent of all the other tags LUs.
9509 def ExpandNames(self):
9510 self.needed_locks = {}
9511 if self.op.kind == constants.TAG_NODE:
9512 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9513 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9514 elif self.op.kind == constants.TAG_INSTANCE:
9515 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9516 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9518 def CheckPrereq(self):
9519 """Check prerequisites.
9522 if self.op.kind == constants.TAG_CLUSTER:
9523 self.target = self.cfg.GetClusterInfo()
9524 elif self.op.kind == constants.TAG_NODE:
9525 self.target = self.cfg.GetNodeInfo(self.op.name)
9526 elif self.op.kind == constants.TAG_INSTANCE:
9527 self.target = self.cfg.GetInstanceInfo(self.op.name)
9529 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9530 str(self.op.kind), errors.ECODE_INVAL)
9533 class LUGetTags(TagsLU):
9534 """Returns the tags of a given object.
9538 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9539 ("name", _TNonEmptyString),
9543 def Exec(self, feedback_fn):
9544 """Returns the tag list.
9547 return list(self.target.GetTags())
9550 class LUSearchTags(NoHooksLU):
9551 """Searches the tags for a given pattern.
9554 _OP_REQP = [("pattern", _TNonEmptyString)]
9557 def ExpandNames(self):
9558 self.needed_locks = {}
9560 def CheckPrereq(self):
9561 """Check prerequisites.
9563 This checks the pattern passed for validity by compiling it.
9567 self.re = re.compile(self.op.pattern)
9568 except re.error, err:
9569 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9570 (self.op.pattern, err), errors.ECODE_INVAL)
9572 def Exec(self, feedback_fn):
9573 """Returns the tag list.
9577 tgts = [("/cluster", cfg.GetClusterInfo())]
9578 ilist = cfg.GetAllInstancesInfo().values()
9579 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9580 nlist = cfg.GetAllNodesInfo().values()
9581 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9583 for path, target in tgts:
9584 for tag in target.GetTags():
9585 if self.re.search(tag):
9586 results.append((path, tag))
9590 class LUAddTags(TagsLU):
9591 """Sets a tag on a given object.
9595 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9596 ("name", _TNonEmptyString),
9597 ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9601 def CheckPrereq(self):
9602 """Check prerequisites.
9604 This checks the type and length of the tag name and value.
9607 TagsLU.CheckPrereq(self)
9608 for tag in self.op.tags:
9609 objects.TaggableObject.ValidateTag(tag)
9611 def Exec(self, feedback_fn):
9616 for tag in self.op.tags:
9617 self.target.AddTag(tag)
9618 except errors.TagError, err:
9619 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9620 self.cfg.Update(self.target, feedback_fn)
9623 class LUDelTags(TagsLU):
9624 """Delete a list of tags from a given object.
9628 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9629 ("name", _TNonEmptyString),
9630 ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9634 def CheckPrereq(self):
9635 """Check prerequisites.
9637 This checks that we have the given tag.
9640 TagsLU.CheckPrereq(self)
9641 for tag in self.op.tags:
9642 objects.TaggableObject.ValidateTag(tag)
9643 del_tags = frozenset(self.op.tags)
9644 cur_tags = self.target.GetTags()
9645 if not del_tags <= cur_tags:
9646 diff_tags = del_tags - cur_tags
9647 diff_names = ["'%s'" % tag for tag in diff_tags]
9649 raise errors.OpPrereqError("Tag(s) %s not found" %
9650 (",".join(diff_names)), errors.ECODE_NOENT)
9652 def Exec(self, feedback_fn):
9653 """Remove the tag from the object.
9656 for tag in self.op.tags:
9657 self.target.RemoveTag(tag)
9658 self.cfg.Update(self.target, feedback_fn)
9661 class LUTestDelay(NoHooksLU):
9662 """Sleep for a specified amount of time.
9664 This LU sleeps on the master and/or nodes for a specified amount of
9669 ("duration", _TFloat),
9670 ("on_master", _TBool),
9671 ("on_nodes", _TListOf(_TNonEmptyString)),
9672 ("repeat", _TPositiveInt)
9679 def ExpandNames(self):
9680 """Expand names and set required locks.
9682 This expands the node list, if any.
9685 self.needed_locks = {}
9686 if self.op.on_nodes:
9687 # _GetWantedNodes can be used here, but is not always appropriate to use
9688 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9690 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9691 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9693 def _TestDelay(self):
9694 """Do the actual sleep.
9697 if self.op.on_master:
9698 if not utils.TestDelay(self.op.duration):
9699 raise errors.OpExecError("Error during master delay test")
9700 if self.op.on_nodes:
9701 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9702 for node, node_result in result.items():
9703 node_result.Raise("Failure during rpc call to node %s" % node)
9705 def Exec(self, feedback_fn):
9706 """Execute the test delay opcode, with the wanted repetitions.
9709 if self.op.repeat == 0:
9712 top_value = self.op.repeat - 1
9713 for i in range(self.op.repeat):
9714 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9718 class IAllocator(object):
9719 """IAllocator framework.
9721 An IAllocator instance has three sets of attributes:
9722 - cfg that is needed to query the cluster
9723 - input data (all members of the _KEYS class attribute are required)
9724 - four buffer attributes (in|out_data|text), that represent the
9725 input (to the external script) in text and data structure format,
9726 and the output from it, again in two formats
9727 - the result variables from the script (success, info, nodes) for
9731 # pylint: disable-msg=R0902
9732 # lots of instance attributes
9734 "name", "mem_size", "disks", "disk_template",
9735 "os", "tags", "nics", "vcpus", "hypervisor",
9738 "name", "relocate_from",
9744 def __init__(self, cfg, rpc, mode, **kwargs):
9747 # init buffer variables
9748 self.in_text = self.out_text = self.in_data = self.out_data = None
9749 # init all input fields so that pylint is happy
9751 self.mem_size = self.disks = self.disk_template = None
9752 self.os = self.tags = self.nics = self.vcpus = None
9753 self.hypervisor = None
9754 self.relocate_from = None
9756 self.evac_nodes = None
9758 self.required_nodes = None
9759 # init result fields
9760 self.success = self.info = self.result = None
9761 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9762 keyset = self._ALLO_KEYS
9763 fn = self._AddNewInstance
9764 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9765 keyset = self._RELO_KEYS
9766 fn = self._AddRelocateInstance
9767 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9768 keyset = self._EVAC_KEYS
9769 fn = self._AddEvacuateNodes
9771 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9772 " IAllocator" % self.mode)
9774 if key not in keyset:
9775 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9776 " IAllocator" % key)
9777 setattr(self, key, kwargs[key])
9780 if key not in kwargs:
9781 raise errors.ProgrammerError("Missing input parameter '%s' to"
9782 " IAllocator" % key)
9783 self._BuildInputData(fn)
9785 def _ComputeClusterData(self):
9786 """Compute the generic allocator input data.
9788 This is the data that is independent of the actual operation.
9792 cluster_info = cfg.GetClusterInfo()
9795 "version": constants.IALLOCATOR_VERSION,
9796 "cluster_name": cfg.GetClusterName(),
9797 "cluster_tags": list(cluster_info.GetTags()),
9798 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9799 # we don't have job IDs
9801 iinfo = cfg.GetAllInstancesInfo().values()
9802 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9806 node_list = cfg.GetNodeList()
9808 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9809 hypervisor_name = self.hypervisor
9810 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9811 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9812 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9813 hypervisor_name = cluster_info.enabled_hypervisors[0]
9815 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9818 self.rpc.call_all_instances_info(node_list,
9819 cluster_info.enabled_hypervisors)
9820 for nname, nresult in node_data.items():
9821 # first fill in static (config-based) values
9822 ninfo = cfg.GetNodeInfo(nname)
9824 "tags": list(ninfo.GetTags()),
9825 "primary_ip": ninfo.primary_ip,
9826 "secondary_ip": ninfo.secondary_ip,
9827 "offline": ninfo.offline,
9828 "drained": ninfo.drained,
9829 "master_candidate": ninfo.master_candidate,
9832 if not (ninfo.offline or ninfo.drained):
9833 nresult.Raise("Can't get data for node %s" % nname)
9834 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9836 remote_info = nresult.payload
9838 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9839 'vg_size', 'vg_free', 'cpu_total']:
9840 if attr not in remote_info:
9841 raise errors.OpExecError("Node '%s' didn't return attribute"
9842 " '%s'" % (nname, attr))
9843 if not isinstance(remote_info[attr], int):
9844 raise errors.OpExecError("Node '%s' returned invalid value"
9846 (nname, attr, remote_info[attr]))
9847 # compute memory used by primary instances
9848 i_p_mem = i_p_up_mem = 0
9849 for iinfo, beinfo in i_list:
9850 if iinfo.primary_node == nname:
9851 i_p_mem += beinfo[constants.BE_MEMORY]
9852 if iinfo.name not in node_iinfo[nname].payload:
9855 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9856 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9857 remote_info['memory_free'] -= max(0, i_mem_diff)
9860 i_p_up_mem += beinfo[constants.BE_MEMORY]
9862 # compute memory used by instances
9864 "total_memory": remote_info['memory_total'],
9865 "reserved_memory": remote_info['memory_dom0'],
9866 "free_memory": remote_info['memory_free'],
9867 "total_disk": remote_info['vg_size'],
9868 "free_disk": remote_info['vg_free'],
9869 "total_cpus": remote_info['cpu_total'],
9870 "i_pri_memory": i_p_mem,
9871 "i_pri_up_memory": i_p_up_mem,
9875 node_results[nname] = pnr
9876 data["nodes"] = node_results
9880 for iinfo, beinfo in i_list:
9882 for nic in iinfo.nics:
9883 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9884 nic_dict = {"mac": nic.mac,
9886 "mode": filled_params[constants.NIC_MODE],
9887 "link": filled_params[constants.NIC_LINK],
9889 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9890 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9891 nic_data.append(nic_dict)
9893 "tags": list(iinfo.GetTags()),
9894 "admin_up": iinfo.admin_up,
9895 "vcpus": beinfo[constants.BE_VCPUS],
9896 "memory": beinfo[constants.BE_MEMORY],
9898 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9900 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9901 "disk_template": iinfo.disk_template,
9902 "hypervisor": iinfo.hypervisor,
9904 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9906 instance_data[iinfo.name] = pir
9908 data["instances"] = instance_data
9912 def _AddNewInstance(self):
9913 """Add new instance data to allocator structure.
9915 This in combination with _AllocatorGetClusterData will create the
9916 correct structure needed as input for the allocator.
9918 The checks for the completeness of the opcode must have already been
9922 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9924 if self.disk_template in constants.DTS_NET_MIRROR:
9925 self.required_nodes = 2
9927 self.required_nodes = 1
9930 "disk_template": self.disk_template,
9933 "vcpus": self.vcpus,
9934 "memory": self.mem_size,
9935 "disks": self.disks,
9936 "disk_space_total": disk_space,
9938 "required_nodes": self.required_nodes,
9942 def _AddRelocateInstance(self):
9943 """Add relocate instance data to allocator structure.
9945 This in combination with _IAllocatorGetClusterData will create the
9946 correct structure needed as input for the allocator.
9948 The checks for the completeness of the opcode must have already been
9952 instance = self.cfg.GetInstanceInfo(self.name)
9953 if instance is None:
9954 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9955 " IAllocator" % self.name)
9957 if instance.disk_template not in constants.DTS_NET_MIRROR:
9958 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9961 if len(instance.secondary_nodes) != 1:
9962 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9965 self.required_nodes = 1
9966 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9967 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9971 "disk_space_total": disk_space,
9972 "required_nodes": self.required_nodes,
9973 "relocate_from": self.relocate_from,
9977 def _AddEvacuateNodes(self):
9978 """Add evacuate nodes data to allocator structure.
9982 "evac_nodes": self.evac_nodes
9986 def _BuildInputData(self, fn):
9987 """Build input data structures.
9990 self._ComputeClusterData()
9993 request["type"] = self.mode
9994 self.in_data["request"] = request
9996 self.in_text = serializer.Dump(self.in_data)
9998 def Run(self, name, validate=True, call_fn=None):
9999 """Run an instance allocator and return the results.
10002 if call_fn is None:
10003 call_fn = self.rpc.call_iallocator_runner
10005 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10006 result.Raise("Failure while running the iallocator script")
10008 self.out_text = result.payload
10010 self._ValidateResult()
10012 def _ValidateResult(self):
10013 """Process the allocator results.
10015 This will process and if successful save the result in
10016 self.out_data and the other parameters.
10020 rdict = serializer.Load(self.out_text)
10021 except Exception, err:
10022 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10024 if not isinstance(rdict, dict):
10025 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10027 # TODO: remove backwards compatiblity in later versions
10028 if "nodes" in rdict and "result" not in rdict:
10029 rdict["result"] = rdict["nodes"]
10032 for key in "success", "info", "result":
10033 if key not in rdict:
10034 raise errors.OpExecError("Can't parse iallocator results:"
10035 " missing key '%s'" % key)
10036 setattr(self, key, rdict[key])
10038 if not isinstance(rdict["result"], list):
10039 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10041 self.out_data = rdict
10044 class LUTestAllocator(NoHooksLU):
10045 """Run allocator tests.
10047 This LU runs the allocator tests
10051 ("direction", _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10052 ("mode", _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10053 ("name", _TNonEmptyString),
10054 ("nics", _TOr(_TNone, _TListOf(
10055 _TDictOf(_TElemOf(["mac", "ip", "bridge"]), _TNonEmptyString)))),
10056 ("disks", _TOr(_TNone, _TList)),
10059 ("hypervisor", None),
10060 ("allocator", None),
10065 def CheckPrereq(self):
10066 """Check prerequisites.
10068 This checks the opcode parameters depending on the director and mode test.
10071 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10072 for attr in ["mem_size", "disks", "disk_template",
10073 "os", "tags", "nics", "vcpus"]:
10074 if not hasattr(self.op, attr):
10075 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10076 attr, errors.ECODE_INVAL)
10077 iname = self.cfg.ExpandInstanceName(self.op.name)
10078 if iname is not None:
10079 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10080 iname, errors.ECODE_EXISTS)
10081 if not isinstance(self.op.nics, list):
10082 raise errors.OpPrereqError("Invalid parameter 'nics'",
10083 errors.ECODE_INVAL)
10084 if not isinstance(self.op.disks, list):
10085 raise errors.OpPrereqError("Invalid parameter 'disks'",
10086 errors.ECODE_INVAL)
10087 for row in self.op.disks:
10088 if (not isinstance(row, dict) or
10089 "size" not in row or
10090 not isinstance(row["size"], int) or
10091 "mode" not in row or
10092 row["mode"] not in ['r', 'w']):
10093 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10094 " parameter", errors.ECODE_INVAL)
10095 if self.op.hypervisor is None:
10096 self.op.hypervisor = self.cfg.GetHypervisorType()
10097 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10098 fname = _ExpandInstanceName(self.cfg, self.op.name)
10099 self.op.name = fname
10100 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10101 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10102 if not hasattr(self.op, "evac_nodes"):
10103 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10104 " opcode input", errors.ECODE_INVAL)
10106 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10107 self.op.mode, errors.ECODE_INVAL)
10109 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10110 if self.op.allocator is None:
10111 raise errors.OpPrereqError("Missing allocator name",
10112 errors.ECODE_INVAL)
10113 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10114 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10115 self.op.direction, errors.ECODE_INVAL)
10117 def Exec(self, feedback_fn):
10118 """Run the allocator test.
10121 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10122 ial = IAllocator(self.cfg, self.rpc,
10125 mem_size=self.op.mem_size,
10126 disks=self.op.disks,
10127 disk_template=self.op.disk_template,
10131 vcpus=self.op.vcpus,
10132 hypervisor=self.op.hypervisor,
10134 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10135 ial = IAllocator(self.cfg, self.rpc,
10138 relocate_from=list(self.relocate_from),
10140 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10141 ial = IAllocator(self.cfg, self.rpc,
10143 evac_nodes=self.op.evac_nodes)
10145 raise errors.ProgrammerError("Uncatched mode %s in"
10146 " LUTestAllocator.Exec", self.op.mode)
10148 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10149 result = ial.in_text
10151 ial.Run(self.op.allocator, validate=False)
10152 result = ial.out_text