4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
40 from ganeti import ssh
41 from ganeti import utils
42 from ganeti import errors
43 from ganeti import hypervisor
44 from ganeti import locking
45 from ganeti import constants
46 from ganeti import objects
47 from ganeti import serializer
48 from ganeti import ssconf
49 from ganeti import uidpool
50 from ganeti import compat
51 from ganeti import masterd
53 import ganeti.masterd.instance # pylint: disable-msg=W0611
56 # Modifiable default values; need to define these here before the
60 """Returns an empty list.
67 """Returns an empty dict.
75 """Checks if the given value is not None.
78 return val is not None
82 """Checks if the given value is None.
89 """Checks if the given value is a boolean.
92 return isinstance(val, bool)
96 """Checks if the given value is an integer.
99 return isinstance(val, int)
103 """Checks if the given value is a float.
106 return isinstance(val, float)
110 """Checks if the given value is a string.
113 return isinstance(val, basestring)
117 """Checks if a given value evaluates to a boolean True value.
123 def _TElemOf(target_list):
124 """Builds a function that checks if a given value is a member of a list.
127 return lambda val: val in target_list
132 """Checks if the given value is a list.
135 return isinstance(val, list)
139 """Checks if the given value is a dictionary.
142 return isinstance(val, dict)
147 """Combine multiple functions using an AND operation.
151 return compat.all(t(val) for t in args)
156 """Combine multiple functions using an AND operation.
160 return compat.any(t(val) for t in args)
167 _TNonEmptyString = _TAnd(_TString, _TTrue)
171 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
174 def _TListOf(my_type):
175 """Checks if a given value is a list with all elements of the same type.
179 lambda lst: compat.all(my_type(v) for v in lst))
182 def _TDictOf(key_type, val_type):
183 """Checks a dict type for the type of its key/values.
187 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
188 and compat.all(val_type(v)
189 for v in my_dict.values())))
193 class LogicalUnit(object):
194 """Logical Unit base class.
196 Subclasses must follow these rules:
197 - implement ExpandNames
198 - implement CheckPrereq (except when tasklets are used)
199 - implement Exec (except when tasklets are used)
200 - implement BuildHooksEnv
201 - redefine HPATH and HTYPE
202 - optionally redefine their run requirements:
203 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
205 Note that all commands require root permissions.
207 @ivar dry_run_result: the value (if any) that will be returned to the caller
208 in dry-run mode (signalled by opcode dry_run parameter)
209 @cvar _OP_DEFS: a list of opcode attributes and the defaults values
210 they should get if not already existing
219 def __init__(self, processor, op, context, rpc):
220 """Constructor for LogicalUnit.
222 This needs to be overridden in derived classes in order to check op
226 self.proc = processor
228 self.cfg = context.cfg
229 self.context = context
231 # Dicts used to declare locking needs to mcpu
232 self.needed_locks = None
233 self.acquired_locks = {}
234 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
236 self.remove_locks = {}
237 # Used to force good behavior when calling helper functions
238 self.recalculate_locks = {}
241 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
242 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
243 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
244 # support for dry-run
245 self.dry_run_result = None
246 # support for generic debug attribute
247 if (not hasattr(self.op, "debug_level") or
248 not isinstance(self.op.debug_level, int)):
249 self.op.debug_level = 0
254 for aname, aval in self._OP_DEFS:
255 if not hasattr(self.op, aname):
260 setattr(self.op, aname, dval)
262 for attr_name, test in self._OP_REQP:
263 if not hasattr(op, attr_name):
264 raise errors.OpPrereqError("Required parameter '%s' missing" %
265 attr_name, errors.ECODE_INVAL)
266 attr_val = getattr(op, attr_name, None)
267 if not callable(test):
268 raise errors.ProgrammerError("Validation for parameter '%s' failed,"
269 " given type is not a proper type (%s)" %
271 if not test(attr_val):
272 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
273 self.op.OP_ID, attr_name, type(attr_val), attr_val)
274 raise errors.OpPrereqError("Parameter '%s' has invalid type" %
275 attr_name, errors.ECODE_INVAL)
277 self.CheckArguments()
280 """Returns the SshRunner object
284 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
287 ssh = property(fget=__GetSSH)
289 def CheckArguments(self):
290 """Check syntactic validity for the opcode arguments.
292 This method is for doing a simple syntactic check and ensure
293 validity of opcode parameters, without any cluster-related
294 checks. While the same can be accomplished in ExpandNames and/or
295 CheckPrereq, doing these separate is better because:
297 - ExpandNames is left as as purely a lock-related function
298 - CheckPrereq is run after we have acquired locks (and possible
301 The function is allowed to change the self.op attribute so that
302 later methods can no longer worry about missing parameters.
307 def ExpandNames(self):
308 """Expand names for this LU.
310 This method is called before starting to execute the opcode, and it should
311 update all the parameters of the opcode to their canonical form (e.g. a
312 short node name must be fully expanded after this method has successfully
313 completed). This way locking, hooks, logging, ecc. can work correctly.
315 LUs which implement this method must also populate the self.needed_locks
316 member, as a dict with lock levels as keys, and a list of needed lock names
319 - use an empty dict if you don't need any lock
320 - if you don't need any lock at a particular level omit that level
321 - don't put anything for the BGL level
322 - if you want all locks at a level use locking.ALL_SET as a value
324 If you need to share locks (rather than acquire them exclusively) at one
325 level you can modify self.share_locks, setting a true value (usually 1) for
326 that level. By default locks are not shared.
328 This function can also define a list of tasklets, which then will be
329 executed in order instead of the usual LU-level CheckPrereq and Exec
330 functions, if those are not defined by the LU.
334 # Acquire all nodes and one instance
335 self.needed_locks = {
336 locking.LEVEL_NODE: locking.ALL_SET,
337 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
339 # Acquire just two nodes
340 self.needed_locks = {
341 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
344 self.needed_locks = {} # No, you can't leave it to the default value None
347 # The implementation of this method is mandatory only if the new LU is
348 # concurrent, so that old LUs don't need to be changed all at the same
351 self.needed_locks = {} # Exclusive LUs don't need locks.
353 raise NotImplementedError
355 def DeclareLocks(self, level):
356 """Declare LU locking needs for a level
358 While most LUs can just declare their locking needs at ExpandNames time,
359 sometimes there's the need to calculate some locks after having acquired
360 the ones before. This function is called just before acquiring locks at a
361 particular level, but after acquiring the ones at lower levels, and permits
362 such calculations. It can be used to modify self.needed_locks, and by
363 default it does nothing.
365 This function is only called if you have something already set in
366 self.needed_locks for the level.
368 @param level: Locking level which is going to be locked
369 @type level: member of ganeti.locking.LEVELS
373 def CheckPrereq(self):
374 """Check prerequisites for this LU.
376 This method should check that the prerequisites for the execution
377 of this LU are fulfilled. It can do internode communication, but
378 it should be idempotent - no cluster or system changes are
381 The method should raise errors.OpPrereqError in case something is
382 not fulfilled. Its return value is ignored.
384 This method should also update all the parameters of the opcode to
385 their canonical form if it hasn't been done by ExpandNames before.
388 if self.tasklets is not None:
389 for (idx, tl) in enumerate(self.tasklets):
390 logging.debug("Checking prerequisites for tasklet %s/%s",
391 idx + 1, len(self.tasklets))
396 def Exec(self, feedback_fn):
399 This method should implement the actual work. It should raise
400 errors.OpExecError for failures that are somewhat dealt with in
404 if self.tasklets is not None:
405 for (idx, tl) in enumerate(self.tasklets):
406 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
409 raise NotImplementedError
411 def BuildHooksEnv(self):
412 """Build hooks environment for this LU.
414 This method should return a three-node tuple consisting of: a dict
415 containing the environment that will be used for running the
416 specific hook for this LU, a list of node names on which the hook
417 should run before the execution, and a list of node names on which
418 the hook should run after the execution.
420 The keys of the dict must not have 'GANETI_' prefixed as this will
421 be handled in the hooks runner. Also note additional keys will be
422 added by the hooks runner. If the LU doesn't define any
423 environment, an empty dict (and not None) should be returned.
425 No nodes should be returned as an empty list (and not None).
427 Note that if the HPATH for a LU class is None, this function will
431 raise NotImplementedError
433 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
434 """Notify the LU about the results of its hooks.
436 This method is called every time a hooks phase is executed, and notifies
437 the Logical Unit about the hooks' result. The LU can then use it to alter
438 its result based on the hooks. By default the method does nothing and the
439 previous result is passed back unchanged but any LU can define it if it
440 wants to use the local cluster hook-scripts somehow.
442 @param phase: one of L{constants.HOOKS_PHASE_POST} or
443 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
444 @param hook_results: the results of the multi-node hooks rpc call
445 @param feedback_fn: function used send feedback back to the caller
446 @param lu_result: the previous Exec result this LU had, or None
448 @return: the new Exec result, based on the previous result
452 # API must be kept, thus we ignore the unused argument and could
453 # be a function warnings
454 # pylint: disable-msg=W0613,R0201
457 def _ExpandAndLockInstance(self):
458 """Helper function to expand and lock an instance.
460 Many LUs that work on an instance take its name in self.op.instance_name
461 and need to expand it and then declare the expanded name for locking. This
462 function does it, and then updates self.op.instance_name to the expanded
463 name. It also initializes needed_locks as a dict, if this hasn't been done
467 if self.needed_locks is None:
468 self.needed_locks = {}
470 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
471 "_ExpandAndLockInstance called with instance-level locks set"
472 self.op.instance_name = _ExpandInstanceName(self.cfg,
473 self.op.instance_name)
474 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
476 def _LockInstancesNodes(self, primary_only=False):
477 """Helper function to declare instances' nodes for locking.
479 This function should be called after locking one or more instances to lock
480 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
481 with all primary or secondary nodes for instances already locked and
482 present in self.needed_locks[locking.LEVEL_INSTANCE].
484 It should be called from DeclareLocks, and for safety only works if
485 self.recalculate_locks[locking.LEVEL_NODE] is set.
487 In the future it may grow parameters to just lock some instance's nodes, or
488 to just lock primaries or secondary nodes, if needed.
490 If should be called in DeclareLocks in a way similar to::
492 if level == locking.LEVEL_NODE:
493 self._LockInstancesNodes()
495 @type primary_only: boolean
496 @param primary_only: only lock primary nodes of locked instances
499 assert locking.LEVEL_NODE in self.recalculate_locks, \
500 "_LockInstancesNodes helper function called with no nodes to recalculate"
502 # TODO: check if we're really been called with the instance locks held
504 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
505 # future we might want to have different behaviors depending on the value
506 # of self.recalculate_locks[locking.LEVEL_NODE]
508 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
509 instance = self.context.cfg.GetInstanceInfo(instance_name)
510 wanted_nodes.append(instance.primary_node)
512 wanted_nodes.extend(instance.secondary_nodes)
514 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
515 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
516 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
517 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
519 del self.recalculate_locks[locking.LEVEL_NODE]
522 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
523 """Simple LU which runs no hooks.
525 This LU is intended as a parent for other LogicalUnits which will
526 run no hooks, in order to reduce duplicate code.
532 def BuildHooksEnv(self):
533 """Empty BuildHooksEnv for NoHooksLu.
535 This just raises an error.
538 assert False, "BuildHooksEnv called for NoHooksLUs"
542 """Tasklet base class.
544 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
545 they can mix legacy code with tasklets. Locking needs to be done in the LU,
546 tasklets know nothing about locks.
548 Subclasses must follow these rules:
549 - Implement CheckPrereq
553 def __init__(self, lu):
560 def CheckPrereq(self):
561 """Check prerequisites for this tasklets.
563 This method should check whether the prerequisites for the execution of
564 this tasklet are fulfilled. It can do internode communication, but it
565 should be idempotent - no cluster or system changes are allowed.
567 The method should raise errors.OpPrereqError in case something is not
568 fulfilled. Its return value is ignored.
570 This method should also update all parameters to their canonical form if it
571 hasn't been done before.
576 def Exec(self, feedback_fn):
577 """Execute the tasklet.
579 This method should implement the actual work. It should raise
580 errors.OpExecError for failures that are somewhat dealt with in code, or
584 raise NotImplementedError
587 def _GetWantedNodes(lu, nodes):
588 """Returns list of checked and expanded node names.
590 @type lu: L{LogicalUnit}
591 @param lu: the logical unit on whose behalf we execute
593 @param nodes: list of node names or None for all nodes
595 @return: the list of nodes, sorted
596 @raise errors.ProgrammerError: if the nodes parameter is wrong type
600 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
601 " non-empty list of nodes whose name is to be expanded.")
603 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
604 return utils.NiceSort(wanted)
607 def _GetWantedInstances(lu, instances):
608 """Returns list of checked and expanded instance names.
610 @type lu: L{LogicalUnit}
611 @param lu: the logical unit on whose behalf we execute
612 @type instances: list
613 @param instances: list of instance names or None for all instances
615 @return: the list of instances, sorted
616 @raise errors.OpPrereqError: if the instances parameter is wrong type
617 @raise errors.OpPrereqError: if any of the passed instances is not found
621 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
623 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
627 def _GetUpdatedParams(old_params, update_dict,
628 use_default=True, use_none=False):
629 """Return the new version of a parameter dictionary.
631 @type old_params: dict
632 @param old_params: old parameters
633 @type update_dict: dict
634 @param update_dict: dict containing new parameter values, or
635 constants.VALUE_DEFAULT to reset the parameter to its default
637 @param use_default: boolean
638 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
639 values as 'to be deleted' values
640 @param use_none: boolean
641 @type use_none: whether to recognise C{None} values as 'to be
644 @return: the new parameter dictionary
647 params_copy = copy.deepcopy(old_params)
648 for key, val in update_dict.iteritems():
649 if ((use_default and val == constants.VALUE_DEFAULT) or
650 (use_none and val is None)):
656 params_copy[key] = val
660 def _CheckOutputFields(static, dynamic, selected):
661 """Checks whether all selected fields are valid.
663 @type static: L{utils.FieldSet}
664 @param static: static fields set
665 @type dynamic: L{utils.FieldSet}
666 @param dynamic: dynamic fields set
673 delta = f.NonMatching(selected)
675 raise errors.OpPrereqError("Unknown output fields selected: %s"
676 % ",".join(delta), errors.ECODE_INVAL)
679 def _CheckBooleanOpField(op, name):
680 """Validates boolean opcode parameters.
682 This will ensure that an opcode parameter is either a boolean value,
683 or None (but that it always exists).
686 val = getattr(op, name, None)
687 if not (val is None or isinstance(val, bool)):
688 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
689 (name, str(val)), errors.ECODE_INVAL)
690 setattr(op, name, val)
693 def _CheckGlobalHvParams(params):
694 """Validates that given hypervisor params are not global ones.
696 This will ensure that instances don't get customised versions of
700 used_globals = constants.HVC_GLOBALS.intersection(params)
702 msg = ("The following hypervisor parameters are global and cannot"
703 " be customized at instance level, please modify them at"
704 " cluster level: %s" % utils.CommaJoin(used_globals))
705 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
708 def _CheckNodeOnline(lu, node):
709 """Ensure that a given node is online.
711 @param lu: the LU on behalf of which we make the check
712 @param node: the node to check
713 @raise errors.OpPrereqError: if the node is offline
716 if lu.cfg.GetNodeInfo(node).offline:
717 raise errors.OpPrereqError("Can't use offline node %s" % node,
721 def _CheckNodeNotDrained(lu, node):
722 """Ensure that a given node is not drained.
724 @param lu: the LU on behalf of which we make the check
725 @param node: the node to check
726 @raise errors.OpPrereqError: if the node is drained
729 if lu.cfg.GetNodeInfo(node).drained:
730 raise errors.OpPrereqError("Can't use drained node %s" % node,
734 def _CheckNodeHasOS(lu, node, os_name, force_variant):
735 """Ensure that a node supports a given OS.
737 @param lu: the LU on behalf of which we make the check
738 @param node: the node to check
739 @param os_name: the OS to query about
740 @param force_variant: whether to ignore variant errors
741 @raise errors.OpPrereqError: if the node is not supporting the OS
744 result = lu.rpc.call_os_get(node, os_name)
745 result.Raise("OS '%s' not in supported OS list for node %s" %
747 prereq=True, ecode=errors.ECODE_INVAL)
748 if not force_variant:
749 _CheckOSVariant(result.payload, os_name)
752 def _RequireFileStorage():
753 """Checks that file storage is enabled.
755 @raise errors.OpPrereqError: when file storage is disabled
758 if not constants.ENABLE_FILE_STORAGE:
759 raise errors.OpPrereqError("File storage disabled at configure time",
763 def _CheckDiskTemplate(template):
764 """Ensure a given disk template is valid.
767 if template not in constants.DISK_TEMPLATES:
768 msg = ("Invalid disk template name '%s', valid templates are: %s" %
769 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
770 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
771 if template == constants.DT_FILE:
772 _RequireFileStorage()
775 def _CheckStorageType(storage_type):
776 """Ensure a given storage type is valid.
779 if storage_type not in constants.VALID_STORAGE_TYPES:
780 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
782 if storage_type == constants.ST_FILE:
783 _RequireFileStorage()
787 def _GetClusterDomainSecret():
788 """Reads the cluster domain secret.
791 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
795 def _CheckInstanceDown(lu, instance, reason):
796 """Ensure that an instance is not running."""
797 if instance.admin_up:
798 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
799 (instance.name, reason), errors.ECODE_STATE)
801 pnode = instance.primary_node
802 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
803 ins_l.Raise("Can't contact node %s for instance information" % pnode,
804 prereq=True, ecode=errors.ECODE_ENVIRON)
806 if instance.name in ins_l.payload:
807 raise errors.OpPrereqError("Instance %s is running, %s" %
808 (instance.name, reason), errors.ECODE_STATE)
811 def _ExpandItemName(fn, name, kind):
812 """Expand an item name.
814 @param fn: the function to use for expansion
815 @param name: requested item name
816 @param kind: text description ('Node' or 'Instance')
817 @return: the resolved (full) name
818 @raise errors.OpPrereqError: if the item is not found
822 if full_name is None:
823 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
828 def _ExpandNodeName(cfg, name):
829 """Wrapper over L{_ExpandItemName} for nodes."""
830 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
833 def _ExpandInstanceName(cfg, name):
834 """Wrapper over L{_ExpandItemName} for instance."""
835 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
838 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
839 memory, vcpus, nics, disk_template, disks,
840 bep, hvp, hypervisor_name):
841 """Builds instance related env variables for hooks
843 This builds the hook environment from individual variables.
846 @param name: the name of the instance
847 @type primary_node: string
848 @param primary_node: the name of the instance's primary node
849 @type secondary_nodes: list
850 @param secondary_nodes: list of secondary nodes as strings
851 @type os_type: string
852 @param os_type: the name of the instance's OS
853 @type status: boolean
854 @param status: the should_run status of the instance
856 @param memory: the memory size of the instance
858 @param vcpus: the count of VCPUs the instance has
860 @param nics: list of tuples (ip, mac, mode, link) representing
861 the NICs the instance has
862 @type disk_template: string
863 @param disk_template: the disk template of the instance
865 @param disks: the list of (size, mode) pairs
867 @param bep: the backend parameters for the instance
869 @param hvp: the hypervisor parameters for the instance
870 @type hypervisor_name: string
871 @param hypervisor_name: the hypervisor for the instance
873 @return: the hook environment for this instance
882 "INSTANCE_NAME": name,
883 "INSTANCE_PRIMARY": primary_node,
884 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
885 "INSTANCE_OS_TYPE": os_type,
886 "INSTANCE_STATUS": str_status,
887 "INSTANCE_MEMORY": memory,
888 "INSTANCE_VCPUS": vcpus,
889 "INSTANCE_DISK_TEMPLATE": disk_template,
890 "INSTANCE_HYPERVISOR": hypervisor_name,
894 nic_count = len(nics)
895 for idx, (ip, mac, mode, link) in enumerate(nics):
898 env["INSTANCE_NIC%d_IP" % idx] = ip
899 env["INSTANCE_NIC%d_MAC" % idx] = mac
900 env["INSTANCE_NIC%d_MODE" % idx] = mode
901 env["INSTANCE_NIC%d_LINK" % idx] = link
902 if mode == constants.NIC_MODE_BRIDGED:
903 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
907 env["INSTANCE_NIC_COUNT"] = nic_count
910 disk_count = len(disks)
911 for idx, (size, mode) in enumerate(disks):
912 env["INSTANCE_DISK%d_SIZE" % idx] = size
913 env["INSTANCE_DISK%d_MODE" % idx] = mode
917 env["INSTANCE_DISK_COUNT"] = disk_count
919 for source, kind in [(bep, "BE"), (hvp, "HV")]:
920 for key, value in source.items():
921 env["INSTANCE_%s_%s" % (kind, key)] = value
926 def _NICListToTuple(lu, nics):
927 """Build a list of nic information tuples.
929 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
930 value in LUQueryInstanceData.
932 @type lu: L{LogicalUnit}
933 @param lu: the logical unit on whose behalf we execute
934 @type nics: list of L{objects.NIC}
935 @param nics: list of nics to convert to hooks tuples
939 cluster = lu.cfg.GetClusterInfo()
943 filled_params = cluster.SimpleFillNIC(nic.nicparams)
944 mode = filled_params[constants.NIC_MODE]
945 link = filled_params[constants.NIC_LINK]
946 hooks_nics.append((ip, mac, mode, link))
950 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
951 """Builds instance related env variables for hooks from an object.
953 @type lu: L{LogicalUnit}
954 @param lu: the logical unit on whose behalf we execute
955 @type instance: L{objects.Instance}
956 @param instance: the instance for which we should build the
959 @param override: dictionary with key/values that will override
962 @return: the hook environment dictionary
965 cluster = lu.cfg.GetClusterInfo()
966 bep = cluster.FillBE(instance)
967 hvp = cluster.FillHV(instance)
969 'name': instance.name,
970 'primary_node': instance.primary_node,
971 'secondary_nodes': instance.secondary_nodes,
972 'os_type': instance.os,
973 'status': instance.admin_up,
974 'memory': bep[constants.BE_MEMORY],
975 'vcpus': bep[constants.BE_VCPUS],
976 'nics': _NICListToTuple(lu, instance.nics),
977 'disk_template': instance.disk_template,
978 'disks': [(disk.size, disk.mode) for disk in instance.disks],
981 'hypervisor_name': instance.hypervisor,
984 args.update(override)
985 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
988 def _AdjustCandidatePool(lu, exceptions):
989 """Adjust the candidate pool after node operations.
992 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
994 lu.LogInfo("Promoted nodes to master candidate role: %s",
995 utils.CommaJoin(node.name for node in mod_list))
996 for name in mod_list:
997 lu.context.ReaddNode(name)
998 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1000 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1004 def _DecideSelfPromotion(lu, exceptions=None):
1005 """Decide whether I should promote myself as a master candidate.
1008 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1009 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1010 # the new node will increase mc_max with one, so:
1011 mc_should = min(mc_should + 1, cp_size)
1012 return mc_now < mc_should
1015 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1016 """Check that the brigdes needed by a list of nics exist.
1019 cluster = lu.cfg.GetClusterInfo()
1020 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1021 brlist = [params[constants.NIC_LINK] for params in paramslist
1022 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1024 result = lu.rpc.call_bridges_exist(target_node, brlist)
1025 result.Raise("Error checking bridges on destination node '%s'" %
1026 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1029 def _CheckInstanceBridgesExist(lu, instance, node=None):
1030 """Check that the brigdes needed by an instance exist.
1034 node = instance.primary_node
1035 _CheckNicsBridgesExist(lu, instance.nics, node)
1038 def _CheckOSVariant(os_obj, name):
1039 """Check whether an OS name conforms to the os variants specification.
1041 @type os_obj: L{objects.OS}
1042 @param os_obj: OS object to check
1044 @param name: OS name passed by the user, to check for validity
1047 if not os_obj.supported_variants:
1050 variant = name.split("+", 1)[1]
1052 raise errors.OpPrereqError("OS name must include a variant",
1055 if variant not in os_obj.supported_variants:
1056 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1059 def _GetNodeInstancesInner(cfg, fn):
1060 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1063 def _GetNodeInstances(cfg, node_name):
1064 """Returns a list of all primary and secondary instances on a node.
1068 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1071 def _GetNodePrimaryInstances(cfg, node_name):
1072 """Returns primary instances on a node.
1075 return _GetNodeInstancesInner(cfg,
1076 lambda inst: node_name == inst.primary_node)
1079 def _GetNodeSecondaryInstances(cfg, node_name):
1080 """Returns secondary instances on a node.
1083 return _GetNodeInstancesInner(cfg,
1084 lambda inst: node_name in inst.secondary_nodes)
1087 def _GetStorageTypeArgs(cfg, storage_type):
1088 """Returns the arguments for a storage type.
1091 # Special case for file storage
1092 if storage_type == constants.ST_FILE:
1093 # storage.FileStorage wants a list of storage directories
1094 return [[cfg.GetFileStorageDir()]]
1099 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1102 for dev in instance.disks:
1103 cfg.SetDiskID(dev, node_name)
1105 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1106 result.Raise("Failed to get disk status from node %s" % node_name,
1107 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1109 for idx, bdev_status in enumerate(result.payload):
1110 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1116 class LUPostInitCluster(LogicalUnit):
1117 """Logical unit for running hooks after cluster initialization.
1120 HPATH = "cluster-init"
1121 HTYPE = constants.HTYPE_CLUSTER
1124 def BuildHooksEnv(self):
1128 env = {"OP_TARGET": self.cfg.GetClusterName()}
1129 mn = self.cfg.GetMasterNode()
1130 return env, [], [mn]
1132 def Exec(self, feedback_fn):
1139 class LUDestroyCluster(LogicalUnit):
1140 """Logical unit for destroying the cluster.
1143 HPATH = "cluster-destroy"
1144 HTYPE = constants.HTYPE_CLUSTER
1147 def BuildHooksEnv(self):
1151 env = {"OP_TARGET": self.cfg.GetClusterName()}
1154 def CheckPrereq(self):
1155 """Check prerequisites.
1157 This checks whether the cluster is empty.
1159 Any errors are signaled by raising errors.OpPrereqError.
1162 master = self.cfg.GetMasterNode()
1164 nodelist = self.cfg.GetNodeList()
1165 if len(nodelist) != 1 or nodelist[0] != master:
1166 raise errors.OpPrereqError("There are still %d node(s) in"
1167 " this cluster." % (len(nodelist) - 1),
1169 instancelist = self.cfg.GetInstanceList()
1171 raise errors.OpPrereqError("There are still %d instance(s) in"
1172 " this cluster." % len(instancelist),
1175 def Exec(self, feedback_fn):
1176 """Destroys the cluster.
1179 master = self.cfg.GetMasterNode()
1180 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1182 # Run post hooks on master node before it's removed
1183 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1185 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1187 # pylint: disable-msg=W0702
1188 self.LogWarning("Errors occurred running hooks on %s" % master)
1190 result = self.rpc.call_node_stop_master(master, False)
1191 result.Raise("Could not disable the master role")
1193 if modify_ssh_setup:
1194 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1195 utils.CreateBackup(priv_key)
1196 utils.CreateBackup(pub_key)
1201 def _VerifyCertificate(filename):
1202 """Verifies a certificate for LUVerifyCluster.
1204 @type filename: string
1205 @param filename: Path to PEM file
1209 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1210 utils.ReadFile(filename))
1211 except Exception, err: # pylint: disable-msg=W0703
1212 return (LUVerifyCluster.ETYPE_ERROR,
1213 "Failed to load X509 certificate %s: %s" % (filename, err))
1216 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1217 constants.SSL_CERT_EXPIRATION_ERROR)
1220 fnamemsg = "While verifying %s: %s" % (filename, msg)
1225 return (None, fnamemsg)
1226 elif errcode == utils.CERT_WARNING:
1227 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1228 elif errcode == utils.CERT_ERROR:
1229 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1231 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1234 class LUVerifyCluster(LogicalUnit):
1235 """Verifies the cluster status.
1238 HPATH = "cluster-verify"
1239 HTYPE = constants.HTYPE_CLUSTER
1241 ("skip_checks", _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1242 ("verbose", _TBool),
1243 ("error_codes", _TBool),
1244 ("debug_simulate_errors", _TBool),
1248 TCLUSTER = "cluster"
1250 TINSTANCE = "instance"
1252 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1253 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1254 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1255 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1256 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1257 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1258 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1259 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1260 ENODEDRBD = (TNODE, "ENODEDRBD")
1261 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1262 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1263 ENODEHV = (TNODE, "ENODEHV")
1264 ENODELVM = (TNODE, "ENODELVM")
1265 ENODEN1 = (TNODE, "ENODEN1")
1266 ENODENET = (TNODE, "ENODENET")
1267 ENODEOS = (TNODE, "ENODEOS")
1268 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1269 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1270 ENODERPC = (TNODE, "ENODERPC")
1271 ENODESSH = (TNODE, "ENODESSH")
1272 ENODEVERSION = (TNODE, "ENODEVERSION")
1273 ENODESETUP = (TNODE, "ENODESETUP")
1274 ENODETIME = (TNODE, "ENODETIME")
1276 ETYPE_FIELD = "code"
1277 ETYPE_ERROR = "ERROR"
1278 ETYPE_WARNING = "WARNING"
1280 class NodeImage(object):
1281 """A class representing the logical and physical status of a node.
1284 @ivar name: the node name to which this object refers
1285 @ivar volumes: a structure as returned from
1286 L{ganeti.backend.GetVolumeList} (runtime)
1287 @ivar instances: a list of running instances (runtime)
1288 @ivar pinst: list of configured primary instances (config)
1289 @ivar sinst: list of configured secondary instances (config)
1290 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1291 of this node (config)
1292 @ivar mfree: free memory, as reported by hypervisor (runtime)
1293 @ivar dfree: free disk, as reported by the node (runtime)
1294 @ivar offline: the offline status (config)
1295 @type rpc_fail: boolean
1296 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1297 not whether the individual keys were correct) (runtime)
1298 @type lvm_fail: boolean
1299 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1300 @type hyp_fail: boolean
1301 @ivar hyp_fail: whether the RPC call didn't return the instance list
1302 @type ghost: boolean
1303 @ivar ghost: whether this is a known node or not (config)
1304 @type os_fail: boolean
1305 @ivar os_fail: whether the RPC call didn't return valid OS data
1307 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1310 def __init__(self, offline=False, name=None):
1319 self.offline = offline
1320 self.rpc_fail = False
1321 self.lvm_fail = False
1322 self.hyp_fail = False
1324 self.os_fail = False
1327 def ExpandNames(self):
1328 self.needed_locks = {
1329 locking.LEVEL_NODE: locking.ALL_SET,
1330 locking.LEVEL_INSTANCE: locking.ALL_SET,
1332 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1334 def _Error(self, ecode, item, msg, *args, **kwargs):
1335 """Format an error message.
1337 Based on the opcode's error_codes parameter, either format a
1338 parseable error code, or a simpler error string.
1340 This must be called only from Exec and functions called from Exec.
1343 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1345 # first complete the msg
1348 # then format the whole message
1349 if self.op.error_codes:
1350 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1356 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1357 # and finally report it via the feedback_fn
1358 self._feedback_fn(" - %s" % msg)
1360 def _ErrorIf(self, cond, *args, **kwargs):
1361 """Log an error message if the passed condition is True.
1364 cond = bool(cond) or self.op.debug_simulate_errors
1366 self._Error(*args, **kwargs)
1367 # do not mark the operation as failed for WARN cases only
1368 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1369 self.bad = self.bad or cond
1371 def _VerifyNode(self, ninfo, nresult):
1372 """Run multiple tests against a node.
1376 - compares ganeti version
1377 - checks vg existence and size > 20G
1378 - checks config file checksum
1379 - checks ssh to other nodes
1381 @type ninfo: L{objects.Node}
1382 @param ninfo: the node to check
1383 @param nresult: the results from the node
1385 @return: whether overall this call was successful (and we can expect
1386 reasonable values in the respose)
1390 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1392 # main result, nresult should be a non-empty dict
1393 test = not nresult or not isinstance(nresult, dict)
1394 _ErrorIf(test, self.ENODERPC, node,
1395 "unable to verify node: no data returned")
1399 # compares ganeti version
1400 local_version = constants.PROTOCOL_VERSION
1401 remote_version = nresult.get("version", None)
1402 test = not (remote_version and
1403 isinstance(remote_version, (list, tuple)) and
1404 len(remote_version) == 2)
1405 _ErrorIf(test, self.ENODERPC, node,
1406 "connection to node returned invalid data")
1410 test = local_version != remote_version[0]
1411 _ErrorIf(test, self.ENODEVERSION, node,
1412 "incompatible protocol versions: master %s,"
1413 " node %s", local_version, remote_version[0])
1417 # node seems compatible, we can actually try to look into its results
1419 # full package version
1420 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1421 self.ENODEVERSION, node,
1422 "software version mismatch: master %s, node %s",
1423 constants.RELEASE_VERSION, remote_version[1],
1424 code=self.ETYPE_WARNING)
1426 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1427 if isinstance(hyp_result, dict):
1428 for hv_name, hv_result in hyp_result.iteritems():
1429 test = hv_result is not None
1430 _ErrorIf(test, self.ENODEHV, node,
1431 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1434 test = nresult.get(constants.NV_NODESETUP,
1435 ["Missing NODESETUP results"])
1436 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1441 def _VerifyNodeTime(self, ninfo, nresult,
1442 nvinfo_starttime, nvinfo_endtime):
1443 """Check the node time.
1445 @type ninfo: L{objects.Node}
1446 @param ninfo: the node to check
1447 @param nresult: the remote results for the node
1448 @param nvinfo_starttime: the start time of the RPC call
1449 @param nvinfo_endtime: the end time of the RPC call
1453 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1455 ntime = nresult.get(constants.NV_TIME, None)
1457 ntime_merged = utils.MergeTime(ntime)
1458 except (ValueError, TypeError):
1459 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1462 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1463 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1464 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1465 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1469 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1470 "Node time diverges by at least %s from master node time",
1473 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1474 """Check the node time.
1476 @type ninfo: L{objects.Node}
1477 @param ninfo: the node to check
1478 @param nresult: the remote results for the node
1479 @param vg_name: the configured VG name
1486 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1488 # checks vg existence and size > 20G
1489 vglist = nresult.get(constants.NV_VGLIST, None)
1491 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1493 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1494 constants.MIN_VG_SIZE)
1495 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1498 pvlist = nresult.get(constants.NV_PVLIST, None)
1499 test = pvlist is None
1500 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1502 # check that ':' is not present in PV names, since it's a
1503 # special character for lvcreate (denotes the range of PEs to
1505 for _, pvname, owner_vg in pvlist:
1506 test = ":" in pvname
1507 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1508 " '%s' of VG '%s'", pvname, owner_vg)
1510 def _VerifyNodeNetwork(self, ninfo, nresult):
1511 """Check the node time.
1513 @type ninfo: L{objects.Node}
1514 @param ninfo: the node to check
1515 @param nresult: the remote results for the node
1519 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1521 test = constants.NV_NODELIST not in nresult
1522 _ErrorIf(test, self.ENODESSH, node,
1523 "node hasn't returned node ssh connectivity data")
1525 if nresult[constants.NV_NODELIST]:
1526 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1527 _ErrorIf(True, self.ENODESSH, node,
1528 "ssh communication with node '%s': %s", a_node, a_msg)
1530 test = constants.NV_NODENETTEST not in nresult
1531 _ErrorIf(test, self.ENODENET, node,
1532 "node hasn't returned node tcp connectivity data")
1534 if nresult[constants.NV_NODENETTEST]:
1535 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1537 _ErrorIf(True, self.ENODENET, node,
1538 "tcp communication with node '%s': %s",
1539 anode, nresult[constants.NV_NODENETTEST][anode])
1541 test = constants.NV_MASTERIP not in nresult
1542 _ErrorIf(test, self.ENODENET, node,
1543 "node hasn't returned node master IP reachability data")
1545 if not nresult[constants.NV_MASTERIP]:
1546 if node == self.master_node:
1547 msg = "the master node cannot reach the master IP (not configured?)"
1549 msg = "cannot reach the master IP"
1550 _ErrorIf(True, self.ENODENET, node, msg)
1553 def _VerifyInstance(self, instance, instanceconfig, node_image):
1554 """Verify an instance.
1556 This function checks to see if the required block devices are
1557 available on the instance's node.
1560 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1561 node_current = instanceconfig.primary_node
1563 node_vol_should = {}
1564 instanceconfig.MapLVsByNode(node_vol_should)
1566 for node in node_vol_should:
1567 n_img = node_image[node]
1568 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1569 # ignore missing volumes on offline or broken nodes
1571 for volume in node_vol_should[node]:
1572 test = volume not in n_img.volumes
1573 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1574 "volume %s missing on node %s", volume, node)
1576 if instanceconfig.admin_up:
1577 pri_img = node_image[node_current]
1578 test = instance not in pri_img.instances and not pri_img.offline
1579 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1580 "instance not running on its primary node %s",
1583 for node, n_img in node_image.items():
1584 if (not node == node_current):
1585 test = instance in n_img.instances
1586 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1587 "instance should not run on node %s", node)
1589 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1590 """Verify if there are any unknown volumes in the cluster.
1592 The .os, .swap and backup volumes are ignored. All other volumes are
1593 reported as unknown.
1596 for node, n_img in node_image.items():
1597 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1598 # skip non-healthy nodes
1600 for volume in n_img.volumes:
1601 test = (node not in node_vol_should or
1602 volume not in node_vol_should[node])
1603 self._ErrorIf(test, self.ENODEORPHANLV, node,
1604 "volume %s is unknown", volume)
1606 def _VerifyOrphanInstances(self, instancelist, node_image):
1607 """Verify the list of running instances.
1609 This checks what instances are running but unknown to the cluster.
1612 for node, n_img in node_image.items():
1613 for o_inst in n_img.instances:
1614 test = o_inst not in instancelist
1615 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1616 "instance %s on node %s should not exist", o_inst, node)
1618 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1619 """Verify N+1 Memory Resilience.
1621 Check that if one single node dies we can still start all the
1622 instances it was primary for.
1625 for node, n_img in node_image.items():
1626 # This code checks that every node which is now listed as
1627 # secondary has enough memory to host all instances it is
1628 # supposed to should a single other node in the cluster fail.
1629 # FIXME: not ready for failover to an arbitrary node
1630 # FIXME: does not support file-backed instances
1631 # WARNING: we currently take into account down instances as well
1632 # as up ones, considering that even if they're down someone
1633 # might want to start them even in the event of a node failure.
1634 for prinode, instances in n_img.sbp.items():
1636 for instance in instances:
1637 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1638 if bep[constants.BE_AUTO_BALANCE]:
1639 needed_mem += bep[constants.BE_MEMORY]
1640 test = n_img.mfree < needed_mem
1641 self._ErrorIf(test, self.ENODEN1, node,
1642 "not enough memory on to accommodate"
1643 " failovers should peer node %s fail", prinode)
1645 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1647 """Verifies and computes the node required file checksums.
1649 @type ninfo: L{objects.Node}
1650 @param ninfo: the node to check
1651 @param nresult: the remote results for the node
1652 @param file_list: required list of files
1653 @param local_cksum: dictionary of local files and their checksums
1654 @param master_files: list of files that only masters should have
1658 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1660 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1661 test = not isinstance(remote_cksum, dict)
1662 _ErrorIf(test, self.ENODEFILECHECK, node,
1663 "node hasn't returned file checksum data")
1667 for file_name in file_list:
1668 node_is_mc = ninfo.master_candidate
1669 must_have = (file_name not in master_files) or node_is_mc
1671 test1 = file_name not in remote_cksum
1673 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1675 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1676 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1677 "file '%s' missing", file_name)
1678 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1679 "file '%s' has wrong checksum", file_name)
1680 # not candidate and this is not a must-have file
1681 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1682 "file '%s' should not exist on non master"
1683 " candidates (and the file is outdated)", file_name)
1684 # all good, except non-master/non-must have combination
1685 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1686 "file '%s' should not exist"
1687 " on non master candidates", file_name)
1689 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1690 """Verifies and the node DRBD status.
1692 @type ninfo: L{objects.Node}
1693 @param ninfo: the node to check
1694 @param nresult: the remote results for the node
1695 @param instanceinfo: the dict of instances
1696 @param drbd_map: the DRBD map as returned by
1697 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1701 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1703 # compute the DRBD minors
1705 for minor, instance in drbd_map[node].items():
1706 test = instance not in instanceinfo
1707 _ErrorIf(test, self.ECLUSTERCFG, None,
1708 "ghost instance '%s' in temporary DRBD map", instance)
1709 # ghost instance should not be running, but otherwise we
1710 # don't give double warnings (both ghost instance and
1711 # unallocated minor in use)
1713 node_drbd[minor] = (instance, False)
1715 instance = instanceinfo[instance]
1716 node_drbd[minor] = (instance.name, instance.admin_up)
1718 # and now check them
1719 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1720 test = not isinstance(used_minors, (tuple, list))
1721 _ErrorIf(test, self.ENODEDRBD, node,
1722 "cannot parse drbd status file: %s", str(used_minors))
1724 # we cannot check drbd status
1727 for minor, (iname, must_exist) in node_drbd.items():
1728 test = minor not in used_minors and must_exist
1729 _ErrorIf(test, self.ENODEDRBD, node,
1730 "drbd minor %d of instance %s is not active", minor, iname)
1731 for minor in used_minors:
1732 test = minor not in node_drbd
1733 _ErrorIf(test, self.ENODEDRBD, node,
1734 "unallocated drbd minor %d is in use", minor)
1736 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1737 """Builds the node OS structures.
1739 @type ninfo: L{objects.Node}
1740 @param ninfo: the node to check
1741 @param nresult: the remote results for the node
1742 @param nimg: the node image object
1746 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1748 remote_os = nresult.get(constants.NV_OSLIST, None)
1749 test = (not isinstance(remote_os, list) or
1750 not compat.all(isinstance(v, list) and len(v) == 7
1751 for v in remote_os))
1753 _ErrorIf(test, self.ENODEOS, node,
1754 "node hasn't returned valid OS data")
1763 for (name, os_path, status, diagnose,
1764 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1766 if name not in os_dict:
1769 # parameters is a list of lists instead of list of tuples due to
1770 # JSON lacking a real tuple type, fix it:
1771 parameters = [tuple(v) for v in parameters]
1772 os_dict[name].append((os_path, status, diagnose,
1773 set(variants), set(parameters), set(api_ver)))
1775 nimg.oslist = os_dict
1777 def _VerifyNodeOS(self, ninfo, nimg, base):
1778 """Verifies the node OS list.
1780 @type ninfo: L{objects.Node}
1781 @param ninfo: the node to check
1782 @param nimg: the node image object
1783 @param base: the 'template' node we match against (e.g. from the master)
1787 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1789 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1791 for os_name, os_data in nimg.oslist.items():
1792 assert os_data, "Empty OS status for OS %s?!" % os_name
1793 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1794 _ErrorIf(not f_status, self.ENODEOS, node,
1795 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1796 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1797 "OS '%s' has multiple entries (first one shadows the rest): %s",
1798 os_name, utils.CommaJoin([v[0] for v in os_data]))
1799 # this will catched in backend too
1800 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1801 and not f_var, self.ENODEOS, node,
1802 "OS %s with API at least %d does not declare any variant",
1803 os_name, constants.OS_API_V15)
1804 # comparisons with the 'base' image
1805 test = os_name not in base.oslist
1806 _ErrorIf(test, self.ENODEOS, node,
1807 "Extra OS %s not present on reference node (%s)",
1811 assert base.oslist[os_name], "Base node has empty OS status?"
1812 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1814 # base OS is invalid, skipping
1816 for kind, a, b in [("API version", f_api, b_api),
1817 ("variants list", f_var, b_var),
1818 ("parameters", f_param, b_param)]:
1819 _ErrorIf(a != b, self.ENODEOS, node,
1820 "OS %s %s differs from reference node %s: %s vs. %s",
1821 kind, os_name, base.name,
1822 utils.CommaJoin(a), utils.CommaJoin(b))
1824 # check any missing OSes
1825 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1826 _ErrorIf(missing, self.ENODEOS, node,
1827 "OSes present on reference node %s but missing on this node: %s",
1828 base.name, utils.CommaJoin(missing))
1830 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1831 """Verifies and updates the node volume data.
1833 This function will update a L{NodeImage}'s internal structures
1834 with data from the remote call.
1836 @type ninfo: L{objects.Node}
1837 @param ninfo: the node to check
1838 @param nresult: the remote results for the node
1839 @param nimg: the node image object
1840 @param vg_name: the configured VG name
1844 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1846 nimg.lvm_fail = True
1847 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1850 elif isinstance(lvdata, basestring):
1851 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1852 utils.SafeEncode(lvdata))
1853 elif not isinstance(lvdata, dict):
1854 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1856 nimg.volumes = lvdata
1857 nimg.lvm_fail = False
1859 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1860 """Verifies and updates the node instance list.
1862 If the listing was successful, then updates this node's instance
1863 list. Otherwise, it marks the RPC call as failed for the instance
1866 @type ninfo: L{objects.Node}
1867 @param ninfo: the node to check
1868 @param nresult: the remote results for the node
1869 @param nimg: the node image object
1872 idata = nresult.get(constants.NV_INSTANCELIST, None)
1873 test = not isinstance(idata, list)
1874 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1875 " (instancelist): %s", utils.SafeEncode(str(idata)))
1877 nimg.hyp_fail = True
1879 nimg.instances = idata
1881 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1882 """Verifies and computes a node information map
1884 @type ninfo: L{objects.Node}
1885 @param ninfo: the node to check
1886 @param nresult: the remote results for the node
1887 @param nimg: the node image object
1888 @param vg_name: the configured VG name
1892 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1894 # try to read free memory (from the hypervisor)
1895 hv_info = nresult.get(constants.NV_HVINFO, None)
1896 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1897 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1900 nimg.mfree = int(hv_info["memory_free"])
1901 except (ValueError, TypeError):
1902 _ErrorIf(True, self.ENODERPC, node,
1903 "node returned invalid nodeinfo, check hypervisor")
1905 # FIXME: devise a free space model for file based instances as well
1906 if vg_name is not None:
1907 test = (constants.NV_VGLIST not in nresult or
1908 vg_name not in nresult[constants.NV_VGLIST])
1909 _ErrorIf(test, self.ENODELVM, node,
1910 "node didn't return data for the volume group '%s'"
1911 " - it is either missing or broken", vg_name)
1914 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1915 except (ValueError, TypeError):
1916 _ErrorIf(True, self.ENODERPC, node,
1917 "node returned invalid LVM info, check LVM status")
1919 def BuildHooksEnv(self):
1922 Cluster-Verify hooks just ran in the post phase and their failure makes
1923 the output be logged in the verify output and the verification to fail.
1926 all_nodes = self.cfg.GetNodeList()
1928 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1930 for node in self.cfg.GetAllNodesInfo().values():
1931 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1933 return env, [], all_nodes
1935 def Exec(self, feedback_fn):
1936 """Verify integrity of cluster, performing various test on nodes.
1940 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1941 verbose = self.op.verbose
1942 self._feedback_fn = feedback_fn
1943 feedback_fn("* Verifying global settings")
1944 for msg in self.cfg.VerifyConfig():
1945 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1947 # Check the cluster certificates
1948 for cert_filename in constants.ALL_CERT_FILES:
1949 (errcode, msg) = _VerifyCertificate(cert_filename)
1950 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1952 vg_name = self.cfg.GetVGName()
1953 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1954 cluster = self.cfg.GetClusterInfo()
1955 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1956 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1957 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1958 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1959 for iname in instancelist)
1960 i_non_redundant = [] # Non redundant instances
1961 i_non_a_balanced = [] # Non auto-balanced instances
1962 n_offline = 0 # Count of offline nodes
1963 n_drained = 0 # Count of nodes being drained
1964 node_vol_should = {}
1966 # FIXME: verify OS list
1967 # do local checksums
1968 master_files = [constants.CLUSTER_CONF_FILE]
1969 master_node = self.master_node = self.cfg.GetMasterNode()
1970 master_ip = self.cfg.GetMasterIP()
1972 file_names = ssconf.SimpleStore().GetFileList()
1973 file_names.extend(constants.ALL_CERT_FILES)
1974 file_names.extend(master_files)
1975 if cluster.modify_etc_hosts:
1976 file_names.append(constants.ETC_HOSTS)
1978 local_checksums = utils.FingerprintFiles(file_names)
1980 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1981 node_verify_param = {
1982 constants.NV_FILELIST: file_names,
1983 constants.NV_NODELIST: [node.name for node in nodeinfo
1984 if not node.offline],
1985 constants.NV_HYPERVISOR: hypervisors,
1986 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1987 node.secondary_ip) for node in nodeinfo
1988 if not node.offline],
1989 constants.NV_INSTANCELIST: hypervisors,
1990 constants.NV_VERSION: None,
1991 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1992 constants.NV_NODESETUP: None,
1993 constants.NV_TIME: None,
1994 constants.NV_MASTERIP: (master_node, master_ip),
1995 constants.NV_OSLIST: None,
1998 if vg_name is not None:
1999 node_verify_param[constants.NV_VGLIST] = None
2000 node_verify_param[constants.NV_LVLIST] = vg_name
2001 node_verify_param[constants.NV_PVLIST] = [vg_name]
2002 node_verify_param[constants.NV_DRBDLIST] = None
2004 # Build our expected cluster state
2005 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2007 for node in nodeinfo)
2009 for instance in instancelist:
2010 inst_config = instanceinfo[instance]
2012 for nname in inst_config.all_nodes:
2013 if nname not in node_image:
2015 gnode = self.NodeImage(name=nname)
2017 node_image[nname] = gnode
2019 inst_config.MapLVsByNode(node_vol_should)
2021 pnode = inst_config.primary_node
2022 node_image[pnode].pinst.append(instance)
2024 for snode in inst_config.secondary_nodes:
2025 nimg = node_image[snode]
2026 nimg.sinst.append(instance)
2027 if pnode not in nimg.sbp:
2028 nimg.sbp[pnode] = []
2029 nimg.sbp[pnode].append(instance)
2031 # At this point, we have the in-memory data structures complete,
2032 # except for the runtime information, which we'll gather next
2034 # Due to the way our RPC system works, exact response times cannot be
2035 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2036 # time before and after executing the request, we can at least have a time
2038 nvinfo_starttime = time.time()
2039 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2040 self.cfg.GetClusterName())
2041 nvinfo_endtime = time.time()
2043 all_drbd_map = self.cfg.ComputeDRBDMap()
2045 feedback_fn("* Verifying node status")
2049 for node_i in nodeinfo:
2051 nimg = node_image[node]
2055 feedback_fn("* Skipping offline node %s" % (node,))
2059 if node == master_node:
2061 elif node_i.master_candidate:
2062 ntype = "master candidate"
2063 elif node_i.drained:
2069 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2071 msg = all_nvinfo[node].fail_msg
2072 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2074 nimg.rpc_fail = True
2077 nresult = all_nvinfo[node].payload
2079 nimg.call_ok = self._VerifyNode(node_i, nresult)
2080 self._VerifyNodeNetwork(node_i, nresult)
2081 self._VerifyNodeLVM(node_i, nresult, vg_name)
2082 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2084 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
2085 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2087 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2088 self._UpdateNodeInstances(node_i, nresult, nimg)
2089 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2090 self._UpdateNodeOS(node_i, nresult, nimg)
2091 if not nimg.os_fail:
2092 if refos_img is None:
2094 self._VerifyNodeOS(node_i, nimg, refos_img)
2096 feedback_fn("* Verifying instance status")
2097 for instance in instancelist:
2099 feedback_fn("* Verifying instance %s" % instance)
2100 inst_config = instanceinfo[instance]
2101 self._VerifyInstance(instance, inst_config, node_image)
2102 inst_nodes_offline = []
2104 pnode = inst_config.primary_node
2105 pnode_img = node_image[pnode]
2106 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2107 self.ENODERPC, pnode, "instance %s, connection to"
2108 " primary node failed", instance)
2110 if pnode_img.offline:
2111 inst_nodes_offline.append(pnode)
2113 # If the instance is non-redundant we cannot survive losing its primary
2114 # node, so we are not N+1 compliant. On the other hand we have no disk
2115 # templates with more than one secondary so that situation is not well
2117 # FIXME: does not support file-backed instances
2118 if not inst_config.secondary_nodes:
2119 i_non_redundant.append(instance)
2120 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2121 instance, "instance has multiple secondary nodes: %s",
2122 utils.CommaJoin(inst_config.secondary_nodes),
2123 code=self.ETYPE_WARNING)
2125 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2126 i_non_a_balanced.append(instance)
2128 for snode in inst_config.secondary_nodes:
2129 s_img = node_image[snode]
2130 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2131 "instance %s, connection to secondary node failed", instance)
2134 inst_nodes_offline.append(snode)
2136 # warn that the instance lives on offline nodes
2137 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2138 "instance lives on offline node(s) %s",
2139 utils.CommaJoin(inst_nodes_offline))
2140 # ... or ghost nodes
2141 for node in inst_config.all_nodes:
2142 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2143 "instance lives on ghost node %s", node)
2145 feedback_fn("* Verifying orphan volumes")
2146 self._VerifyOrphanVolumes(node_vol_should, node_image)
2148 feedback_fn("* Verifying orphan instances")
2149 self._VerifyOrphanInstances(instancelist, node_image)
2151 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2152 feedback_fn("* Verifying N+1 Memory redundancy")
2153 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2155 feedback_fn("* Other Notes")
2157 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2158 % len(i_non_redundant))
2160 if i_non_a_balanced:
2161 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2162 % len(i_non_a_balanced))
2165 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2168 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2172 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2173 """Analyze the post-hooks' result
2175 This method analyses the hook result, handles it, and sends some
2176 nicely-formatted feedback back to the user.
2178 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2179 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2180 @param hooks_results: the results of the multi-node hooks rpc call
2181 @param feedback_fn: function used send feedback back to the caller
2182 @param lu_result: previous Exec result
2183 @return: the new Exec result, based on the previous result
2187 # We only really run POST phase hooks, and are only interested in
2189 if phase == constants.HOOKS_PHASE_POST:
2190 # Used to change hooks' output to proper indentation
2191 indent_re = re.compile('^', re.M)
2192 feedback_fn("* Hooks Results")
2193 assert hooks_results, "invalid result from hooks"
2195 for node_name in hooks_results:
2196 res = hooks_results[node_name]
2198 test = msg and not res.offline
2199 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2200 "Communication failure in hooks execution: %s", msg)
2201 if res.offline or msg:
2202 # No need to investigate payload if node is offline or gave an error.
2203 # override manually lu_result here as _ErrorIf only
2204 # overrides self.bad
2207 for script, hkr, output in res.payload:
2208 test = hkr == constants.HKR_FAIL
2209 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2210 "Script %s failed, output:", script)
2212 output = indent_re.sub(' ', output)
2213 feedback_fn("%s" % output)
2219 class LUVerifyDisks(NoHooksLU):
2220 """Verifies the cluster disks status.
2226 def ExpandNames(self):
2227 self.needed_locks = {
2228 locking.LEVEL_NODE: locking.ALL_SET,
2229 locking.LEVEL_INSTANCE: locking.ALL_SET,
2231 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2233 def Exec(self, feedback_fn):
2234 """Verify integrity of cluster disks.
2236 @rtype: tuple of three items
2237 @return: a tuple of (dict of node-to-node_error, list of instances
2238 which need activate-disks, dict of instance: (node, volume) for
2242 result = res_nodes, res_instances, res_missing = {}, [], {}
2244 vg_name = self.cfg.GetVGName()
2245 nodes = utils.NiceSort(self.cfg.GetNodeList())
2246 instances = [self.cfg.GetInstanceInfo(name)
2247 for name in self.cfg.GetInstanceList()]
2250 for inst in instances:
2252 if (not inst.admin_up or
2253 inst.disk_template not in constants.DTS_NET_MIRROR):
2255 inst.MapLVsByNode(inst_lvs)
2256 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2257 for node, vol_list in inst_lvs.iteritems():
2258 for vol in vol_list:
2259 nv_dict[(node, vol)] = inst
2264 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2268 node_res = node_lvs[node]
2269 if node_res.offline:
2271 msg = node_res.fail_msg
2273 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2274 res_nodes[node] = msg
2277 lvs = node_res.payload
2278 for lv_name, (_, _, lv_online) in lvs.items():
2279 inst = nv_dict.pop((node, lv_name), None)
2280 if (not lv_online and inst is not None
2281 and inst.name not in res_instances):
2282 res_instances.append(inst.name)
2284 # any leftover items in nv_dict are missing LVs, let's arrange the
2286 for key, inst in nv_dict.iteritems():
2287 if inst.name not in res_missing:
2288 res_missing[inst.name] = []
2289 res_missing[inst.name].append(key)
2294 class LURepairDiskSizes(NoHooksLU):
2295 """Verifies the cluster disks sizes.
2298 _OP_REQP = [("instances", _TListOf(_TNonEmptyString))]
2301 def ExpandNames(self):
2302 if self.op.instances:
2303 self.wanted_names = []
2304 for name in self.op.instances:
2305 full_name = _ExpandInstanceName(self.cfg, name)
2306 self.wanted_names.append(full_name)
2307 self.needed_locks = {
2308 locking.LEVEL_NODE: [],
2309 locking.LEVEL_INSTANCE: self.wanted_names,
2311 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2313 self.wanted_names = None
2314 self.needed_locks = {
2315 locking.LEVEL_NODE: locking.ALL_SET,
2316 locking.LEVEL_INSTANCE: locking.ALL_SET,
2318 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2320 def DeclareLocks(self, level):
2321 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2322 self._LockInstancesNodes(primary_only=True)
2324 def CheckPrereq(self):
2325 """Check prerequisites.
2327 This only checks the optional instance list against the existing names.
2330 if self.wanted_names is None:
2331 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2333 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2334 in self.wanted_names]
2336 def _EnsureChildSizes(self, disk):
2337 """Ensure children of the disk have the needed disk size.
2339 This is valid mainly for DRBD8 and fixes an issue where the
2340 children have smaller disk size.
2342 @param disk: an L{ganeti.objects.Disk} object
2345 if disk.dev_type == constants.LD_DRBD8:
2346 assert disk.children, "Empty children for DRBD8?"
2347 fchild = disk.children[0]
2348 mismatch = fchild.size < disk.size
2350 self.LogInfo("Child disk has size %d, parent %d, fixing",
2351 fchild.size, disk.size)
2352 fchild.size = disk.size
2354 # and we recurse on this child only, not on the metadev
2355 return self._EnsureChildSizes(fchild) or mismatch
2359 def Exec(self, feedback_fn):
2360 """Verify the size of cluster disks.
2363 # TODO: check child disks too
2364 # TODO: check differences in size between primary/secondary nodes
2366 for instance in self.wanted_instances:
2367 pnode = instance.primary_node
2368 if pnode not in per_node_disks:
2369 per_node_disks[pnode] = []
2370 for idx, disk in enumerate(instance.disks):
2371 per_node_disks[pnode].append((instance, idx, disk))
2374 for node, dskl in per_node_disks.items():
2375 newl = [v[2].Copy() for v in dskl]
2377 self.cfg.SetDiskID(dsk, node)
2378 result = self.rpc.call_blockdev_getsizes(node, newl)
2380 self.LogWarning("Failure in blockdev_getsizes call to node"
2381 " %s, ignoring", node)
2383 if len(result.data) != len(dskl):
2384 self.LogWarning("Invalid result from node %s, ignoring node results",
2387 for ((instance, idx, disk), size) in zip(dskl, result.data):
2389 self.LogWarning("Disk %d of instance %s did not return size"
2390 " information, ignoring", idx, instance.name)
2392 if not isinstance(size, (int, long)):
2393 self.LogWarning("Disk %d of instance %s did not return valid"
2394 " size information, ignoring", idx, instance.name)
2397 if size != disk.size:
2398 self.LogInfo("Disk %d of instance %s has mismatched size,"
2399 " correcting: recorded %d, actual %d", idx,
2400 instance.name, disk.size, size)
2402 self.cfg.Update(instance, feedback_fn)
2403 changed.append((instance.name, idx, size))
2404 if self._EnsureChildSizes(disk):
2405 self.cfg.Update(instance, feedback_fn)
2406 changed.append((instance.name, idx, disk.size))
2410 class LURenameCluster(LogicalUnit):
2411 """Rename the cluster.
2414 HPATH = "cluster-rename"
2415 HTYPE = constants.HTYPE_CLUSTER
2416 _OP_REQP = [("name", _TNonEmptyString)]
2418 def BuildHooksEnv(self):
2423 "OP_TARGET": self.cfg.GetClusterName(),
2424 "NEW_NAME": self.op.name,
2426 mn = self.cfg.GetMasterNode()
2427 all_nodes = self.cfg.GetNodeList()
2428 return env, [mn], all_nodes
2430 def CheckPrereq(self):
2431 """Verify that the passed name is a valid one.
2434 hostname = utils.GetHostInfo(self.op.name)
2436 new_name = hostname.name
2437 self.ip = new_ip = hostname.ip
2438 old_name = self.cfg.GetClusterName()
2439 old_ip = self.cfg.GetMasterIP()
2440 if new_name == old_name and new_ip == old_ip:
2441 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2442 " cluster has changed",
2444 if new_ip != old_ip:
2445 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2446 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2447 " reachable on the network. Aborting." %
2448 new_ip, errors.ECODE_NOTUNIQUE)
2450 self.op.name = new_name
2452 def Exec(self, feedback_fn):
2453 """Rename the cluster.
2456 clustername = self.op.name
2459 # shutdown the master IP
2460 master = self.cfg.GetMasterNode()
2461 result = self.rpc.call_node_stop_master(master, False)
2462 result.Raise("Could not disable the master role")
2465 cluster = self.cfg.GetClusterInfo()
2466 cluster.cluster_name = clustername
2467 cluster.master_ip = ip
2468 self.cfg.Update(cluster, feedback_fn)
2470 # update the known hosts file
2471 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2472 node_list = self.cfg.GetNodeList()
2474 node_list.remove(master)
2477 result = self.rpc.call_upload_file(node_list,
2478 constants.SSH_KNOWN_HOSTS_FILE)
2479 for to_node, to_result in result.iteritems():
2480 msg = to_result.fail_msg
2482 msg = ("Copy of file %s to node %s failed: %s" %
2483 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2484 self.proc.LogWarning(msg)
2487 result = self.rpc.call_node_start_master(master, False, False)
2488 msg = result.fail_msg
2490 self.LogWarning("Could not re-enable the master role on"
2491 " the master, please restart manually: %s", msg)
2494 def _RecursiveCheckIfLVMBased(disk):
2495 """Check if the given disk or its children are lvm-based.
2497 @type disk: L{objects.Disk}
2498 @param disk: the disk to check
2500 @return: boolean indicating whether a LD_LV dev_type was found or not
2504 for chdisk in disk.children:
2505 if _RecursiveCheckIfLVMBased(chdisk):
2507 return disk.dev_type == constants.LD_LV
2510 class LUSetClusterParams(LogicalUnit):
2511 """Change the parameters of the cluster.
2514 HPATH = "cluster-modify"
2515 HTYPE = constants.HTYPE_CLUSTER
2517 ("hvparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2518 ("os_hvp", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2519 ("osparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2520 ("enabled_hypervisors",
2521 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2524 ("candidate_pool_size", None),
2527 ("remove_uids", None),
2534 def CheckArguments(self):
2538 if self.op.candidate_pool_size is not None:
2540 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2541 except (ValueError, TypeError), err:
2542 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2543 str(err), errors.ECODE_INVAL)
2544 if self.op.candidate_pool_size < 1:
2545 raise errors.OpPrereqError("At least one master candidate needed",
2548 _CheckBooleanOpField(self.op, "maintain_node_health")
2550 if self.op.uid_pool:
2551 uidpool.CheckUidPool(self.op.uid_pool)
2553 if self.op.add_uids:
2554 uidpool.CheckUidPool(self.op.add_uids)
2556 if self.op.remove_uids:
2557 uidpool.CheckUidPool(self.op.remove_uids)
2559 def ExpandNames(self):
2560 # FIXME: in the future maybe other cluster params won't require checking on
2561 # all nodes to be modified.
2562 self.needed_locks = {
2563 locking.LEVEL_NODE: locking.ALL_SET,
2565 self.share_locks[locking.LEVEL_NODE] = 1
2567 def BuildHooksEnv(self):
2572 "OP_TARGET": self.cfg.GetClusterName(),
2573 "NEW_VG_NAME": self.op.vg_name,
2575 mn = self.cfg.GetMasterNode()
2576 return env, [mn], [mn]
2578 def CheckPrereq(self):
2579 """Check prerequisites.
2581 This checks whether the given params don't conflict and
2582 if the given volume group is valid.
2585 if self.op.vg_name is not None and not self.op.vg_name:
2586 instances = self.cfg.GetAllInstancesInfo().values()
2587 for inst in instances:
2588 for disk in inst.disks:
2589 if _RecursiveCheckIfLVMBased(disk):
2590 raise errors.OpPrereqError("Cannot disable lvm storage while"
2591 " lvm-based instances exist",
2594 node_list = self.acquired_locks[locking.LEVEL_NODE]
2596 # if vg_name not None, checks given volume group on all nodes
2598 vglist = self.rpc.call_vg_list(node_list)
2599 for node in node_list:
2600 msg = vglist[node].fail_msg
2602 # ignoring down node
2603 self.LogWarning("Error while gathering data on node %s"
2604 " (ignoring node): %s", node, msg)
2606 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2608 constants.MIN_VG_SIZE)
2610 raise errors.OpPrereqError("Error on node '%s': %s" %
2611 (node, vgstatus), errors.ECODE_ENVIRON)
2613 self.cluster = cluster = self.cfg.GetClusterInfo()
2614 # validate params changes
2615 if self.op.beparams:
2616 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2617 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2619 if self.op.nicparams:
2620 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2621 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2622 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2625 # check all instances for consistency
2626 for instance in self.cfg.GetAllInstancesInfo().values():
2627 for nic_idx, nic in enumerate(instance.nics):
2628 params_copy = copy.deepcopy(nic.nicparams)
2629 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2631 # check parameter syntax
2633 objects.NIC.CheckParameterSyntax(params_filled)
2634 except errors.ConfigurationError, err:
2635 nic_errors.append("Instance %s, nic/%d: %s" %
2636 (instance.name, nic_idx, err))
2638 # if we're moving instances to routed, check that they have an ip
2639 target_mode = params_filled[constants.NIC_MODE]
2640 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2641 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2642 (instance.name, nic_idx))
2644 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2645 "\n".join(nic_errors))
2647 # hypervisor list/parameters
2648 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2649 if self.op.hvparams:
2650 for hv_name, hv_dict in self.op.hvparams.items():
2651 if hv_name not in self.new_hvparams:
2652 self.new_hvparams[hv_name] = hv_dict
2654 self.new_hvparams[hv_name].update(hv_dict)
2656 # os hypervisor parameters
2657 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2659 for os_name, hvs in self.op.os_hvp.items():
2660 if os_name not in self.new_os_hvp:
2661 self.new_os_hvp[os_name] = hvs
2663 for hv_name, hv_dict in hvs.items():
2664 if hv_name not in self.new_os_hvp[os_name]:
2665 self.new_os_hvp[os_name][hv_name] = hv_dict
2667 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2670 self.new_osp = objects.FillDict(cluster.osparams, {})
2671 if self.op.osparams:
2672 for os_name, osp in self.op.osparams.items():
2673 if os_name not in self.new_osp:
2674 self.new_osp[os_name] = {}
2676 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2679 if not self.new_osp[os_name]:
2680 # we removed all parameters
2681 del self.new_osp[os_name]
2683 # check the parameter validity (remote check)
2684 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2685 os_name, self.new_osp[os_name])
2687 # changes to the hypervisor list
2688 if self.op.enabled_hypervisors is not None:
2689 self.hv_list = self.op.enabled_hypervisors
2690 for hv in self.hv_list:
2691 # if the hypervisor doesn't already exist in the cluster
2692 # hvparams, we initialize it to empty, and then (in both
2693 # cases) we make sure to fill the defaults, as we might not
2694 # have a complete defaults list if the hypervisor wasn't
2696 if hv not in new_hvp:
2698 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2699 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2701 self.hv_list = cluster.enabled_hypervisors
2703 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2704 # either the enabled list has changed, or the parameters have, validate
2705 for hv_name, hv_params in self.new_hvparams.items():
2706 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2707 (self.op.enabled_hypervisors and
2708 hv_name in self.op.enabled_hypervisors)):
2709 # either this is a new hypervisor, or its parameters have changed
2710 hv_class = hypervisor.GetHypervisor(hv_name)
2711 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2712 hv_class.CheckParameterSyntax(hv_params)
2713 _CheckHVParams(self, node_list, hv_name, hv_params)
2716 # no need to check any newly-enabled hypervisors, since the
2717 # defaults have already been checked in the above code-block
2718 for os_name, os_hvp in self.new_os_hvp.items():
2719 for hv_name, hv_params in os_hvp.items():
2720 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2721 # we need to fill in the new os_hvp on top of the actual hv_p
2722 cluster_defaults = self.new_hvparams.get(hv_name, {})
2723 new_osp = objects.FillDict(cluster_defaults, hv_params)
2724 hv_class = hypervisor.GetHypervisor(hv_name)
2725 hv_class.CheckParameterSyntax(new_osp)
2726 _CheckHVParams(self, node_list, hv_name, new_osp)
2729 def Exec(self, feedback_fn):
2730 """Change the parameters of the cluster.
2733 if self.op.vg_name is not None:
2734 new_volume = self.op.vg_name
2737 if new_volume != self.cfg.GetVGName():
2738 self.cfg.SetVGName(new_volume)
2740 feedback_fn("Cluster LVM configuration already in desired"
2741 " state, not changing")
2742 if self.op.hvparams:
2743 self.cluster.hvparams = self.new_hvparams
2745 self.cluster.os_hvp = self.new_os_hvp
2746 if self.op.enabled_hypervisors is not None:
2747 self.cluster.hvparams = self.new_hvparams
2748 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2749 if self.op.beparams:
2750 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2751 if self.op.nicparams:
2752 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2753 if self.op.osparams:
2754 self.cluster.osparams = self.new_osp
2756 if self.op.candidate_pool_size is not None:
2757 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2758 # we need to update the pool size here, otherwise the save will fail
2759 _AdjustCandidatePool(self, [])
2761 if self.op.maintain_node_health is not None:
2762 self.cluster.maintain_node_health = self.op.maintain_node_health
2764 if self.op.add_uids is not None:
2765 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2767 if self.op.remove_uids is not None:
2768 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2770 if self.op.uid_pool is not None:
2771 self.cluster.uid_pool = self.op.uid_pool
2773 self.cfg.Update(self.cluster, feedback_fn)
2776 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2777 """Distribute additional files which are part of the cluster configuration.
2779 ConfigWriter takes care of distributing the config and ssconf files, but
2780 there are more files which should be distributed to all nodes. This function
2781 makes sure those are copied.
2783 @param lu: calling logical unit
2784 @param additional_nodes: list of nodes not in the config to distribute to
2787 # 1. Gather target nodes
2788 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2789 dist_nodes = lu.cfg.GetOnlineNodeList()
2790 if additional_nodes is not None:
2791 dist_nodes.extend(additional_nodes)
2792 if myself.name in dist_nodes:
2793 dist_nodes.remove(myself.name)
2795 # 2. Gather files to distribute
2796 dist_files = set([constants.ETC_HOSTS,
2797 constants.SSH_KNOWN_HOSTS_FILE,
2798 constants.RAPI_CERT_FILE,
2799 constants.RAPI_USERS_FILE,
2800 constants.CONFD_HMAC_KEY,
2801 constants.CLUSTER_DOMAIN_SECRET_FILE,
2804 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2805 for hv_name in enabled_hypervisors:
2806 hv_class = hypervisor.GetHypervisor(hv_name)
2807 dist_files.update(hv_class.GetAncillaryFiles())
2809 # 3. Perform the files upload
2810 for fname in dist_files:
2811 if os.path.exists(fname):
2812 result = lu.rpc.call_upload_file(dist_nodes, fname)
2813 for to_node, to_result in result.items():
2814 msg = to_result.fail_msg
2816 msg = ("Copy of file %s to node %s failed: %s" %
2817 (fname, to_node, msg))
2818 lu.proc.LogWarning(msg)
2821 class LURedistributeConfig(NoHooksLU):
2822 """Force the redistribution of cluster configuration.
2824 This is a very simple LU.
2830 def ExpandNames(self):
2831 self.needed_locks = {
2832 locking.LEVEL_NODE: locking.ALL_SET,
2834 self.share_locks[locking.LEVEL_NODE] = 1
2836 def Exec(self, feedback_fn):
2837 """Redistribute the configuration.
2840 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2841 _RedistributeAncillaryFiles(self)
2844 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2845 """Sleep and poll for an instance's disk to sync.
2848 if not instance.disks or disks is not None and not disks:
2851 disks = _ExpandCheckDisks(instance, disks)
2854 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2856 node = instance.primary_node
2859 lu.cfg.SetDiskID(dev, node)
2861 # TODO: Convert to utils.Retry
2864 degr_retries = 10 # in seconds, as we sleep 1 second each time
2868 cumul_degraded = False
2869 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2870 msg = rstats.fail_msg
2872 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2875 raise errors.RemoteError("Can't contact node %s for mirror data,"
2876 " aborting." % node)
2879 rstats = rstats.payload
2881 for i, mstat in enumerate(rstats):
2883 lu.LogWarning("Can't compute data for node %s/%s",
2884 node, disks[i].iv_name)
2887 cumul_degraded = (cumul_degraded or
2888 (mstat.is_degraded and mstat.sync_percent is None))
2889 if mstat.sync_percent is not None:
2891 if mstat.estimated_time is not None:
2892 rem_time = ("%s remaining (estimated)" %
2893 utils.FormatSeconds(mstat.estimated_time))
2894 max_time = mstat.estimated_time
2896 rem_time = "no time estimate"
2897 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2898 (disks[i].iv_name, mstat.sync_percent, rem_time))
2900 # if we're done but degraded, let's do a few small retries, to
2901 # make sure we see a stable and not transient situation; therefore
2902 # we force restart of the loop
2903 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2904 logging.info("Degraded disks found, %d retries left", degr_retries)
2912 time.sleep(min(60, max_time))
2915 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2916 return not cumul_degraded
2919 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2920 """Check that mirrors are not degraded.
2922 The ldisk parameter, if True, will change the test from the
2923 is_degraded attribute (which represents overall non-ok status for
2924 the device(s)) to the ldisk (representing the local storage status).
2927 lu.cfg.SetDiskID(dev, node)
2931 if on_primary or dev.AssembleOnSecondary():
2932 rstats = lu.rpc.call_blockdev_find(node, dev)
2933 msg = rstats.fail_msg
2935 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2937 elif not rstats.payload:
2938 lu.LogWarning("Can't find disk on node %s", node)
2942 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2944 result = result and not rstats.payload.is_degraded
2947 for child in dev.children:
2948 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2953 class LUDiagnoseOS(NoHooksLU):
2954 """Logical unit for OS diagnose/query.
2958 ("output_fields", _TListOf(_TNonEmptyString)),
2959 ("names", _TListOf(_TNonEmptyString)),
2962 _FIELDS_STATIC = utils.FieldSet()
2963 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2964 "parameters", "api_versions")
2966 def CheckArguments(self):
2968 raise errors.OpPrereqError("Selective OS query not supported",
2971 _CheckOutputFields(static=self._FIELDS_STATIC,
2972 dynamic=self._FIELDS_DYNAMIC,
2973 selected=self.op.output_fields)
2975 def ExpandNames(self):
2976 # Lock all nodes, in shared mode
2977 # Temporary removal of locks, should be reverted later
2978 # TODO: reintroduce locks when they are lighter-weight
2979 self.needed_locks = {}
2980 #self.share_locks[locking.LEVEL_NODE] = 1
2981 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2984 def _DiagnoseByOS(rlist):
2985 """Remaps a per-node return list into an a per-os per-node dictionary
2987 @param rlist: a map with node names as keys and OS objects as values
2990 @return: a dictionary with osnames as keys and as value another
2991 map, with nodes as keys and tuples of (path, status, diagnose,
2992 variants, parameters, api_versions) as values, eg::
2994 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2995 (/srv/..., False, "invalid api")],
2996 "node2": [(/srv/..., True, "", [], [])]}
3001 # we build here the list of nodes that didn't fail the RPC (at RPC
3002 # level), so that nodes with a non-responding node daemon don't
3003 # make all OSes invalid
3004 good_nodes = [node_name for node_name in rlist
3005 if not rlist[node_name].fail_msg]
3006 for node_name, nr in rlist.items():
3007 if nr.fail_msg or not nr.payload:
3009 for (name, path, status, diagnose, variants,
3010 params, api_versions) in nr.payload:
3011 if name not in all_os:
3012 # build a list of nodes for this os containing empty lists
3013 # for each node in node_list
3015 for nname in good_nodes:
3016 all_os[name][nname] = []
3017 # convert params from [name, help] to (name, help)
3018 params = [tuple(v) for v in params]
3019 all_os[name][node_name].append((path, status, diagnose,
3020 variants, params, api_versions))
3023 def Exec(self, feedback_fn):
3024 """Compute the list of OSes.
3027 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3028 node_data = self.rpc.call_os_diagnose(valid_nodes)
3029 pol = self._DiagnoseByOS(node_data)
3032 for os_name, os_data in pol.items():
3035 (variants, params, api_versions) = null_state = (set(), set(), set())
3036 for idx, osl in enumerate(os_data.values()):
3037 valid = bool(valid and osl and osl[0][1])
3039 (variants, params, api_versions) = null_state
3041 node_variants, node_params, node_api = osl[0][3:6]
3042 if idx == 0: # first entry
3043 variants = set(node_variants)
3044 params = set(node_params)
3045 api_versions = set(node_api)
3046 else: # keep consistency
3047 variants.intersection_update(node_variants)
3048 params.intersection_update(node_params)
3049 api_versions.intersection_update(node_api)
3051 for field in self.op.output_fields:
3054 elif field == "valid":
3056 elif field == "node_status":
3057 # this is just a copy of the dict
3059 for node_name, nos_list in os_data.items():
3060 val[node_name] = nos_list
3061 elif field == "variants":
3062 val = list(variants)
3063 elif field == "parameters":
3065 elif field == "api_versions":
3066 val = list(api_versions)
3068 raise errors.ParameterError(field)
3075 class LURemoveNode(LogicalUnit):
3076 """Logical unit for removing a node.
3079 HPATH = "node-remove"
3080 HTYPE = constants.HTYPE_NODE
3081 _OP_REQP = [("node_name", _TNonEmptyString)]
3083 def BuildHooksEnv(self):
3086 This doesn't run on the target node in the pre phase as a failed
3087 node would then be impossible to remove.
3091 "OP_TARGET": self.op.node_name,
3092 "NODE_NAME": self.op.node_name,
3094 all_nodes = self.cfg.GetNodeList()
3096 all_nodes.remove(self.op.node_name)
3098 logging.warning("Node %s which is about to be removed not found"
3099 " in the all nodes list", self.op.node_name)
3100 return env, all_nodes, all_nodes
3102 def CheckPrereq(self):
3103 """Check prerequisites.
3106 - the node exists in the configuration
3107 - it does not have primary or secondary instances
3108 - it's not the master
3110 Any errors are signaled by raising errors.OpPrereqError.
3113 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3114 node = self.cfg.GetNodeInfo(self.op.node_name)
3115 assert node is not None
3117 instance_list = self.cfg.GetInstanceList()
3119 masternode = self.cfg.GetMasterNode()
3120 if node.name == masternode:
3121 raise errors.OpPrereqError("Node is the master node,"
3122 " you need to failover first.",
3125 for instance_name in instance_list:
3126 instance = self.cfg.GetInstanceInfo(instance_name)
3127 if node.name in instance.all_nodes:
3128 raise errors.OpPrereqError("Instance %s is still running on the node,"
3129 " please remove first." % instance_name,
3131 self.op.node_name = node.name
3134 def Exec(self, feedback_fn):
3135 """Removes the node from the cluster.
3139 logging.info("Stopping the node daemon and removing configs from node %s",
3142 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3144 # Promote nodes to master candidate as needed
3145 _AdjustCandidatePool(self, exceptions=[node.name])
3146 self.context.RemoveNode(node.name)
3148 # Run post hooks on the node before it's removed
3149 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3151 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3153 # pylint: disable-msg=W0702
3154 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3156 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3157 msg = result.fail_msg
3159 self.LogWarning("Errors encountered on the remote node while leaving"
3160 " the cluster: %s", msg)
3162 # Remove node from our /etc/hosts
3163 if self.cfg.GetClusterInfo().modify_etc_hosts:
3164 # FIXME: this should be done via an rpc call to node daemon
3165 utils.RemoveHostFromEtcHosts(node.name)
3166 _RedistributeAncillaryFiles(self)
3169 class LUQueryNodes(NoHooksLU):
3170 """Logical unit for querying nodes.
3173 # pylint: disable-msg=W0142
3175 ("output_fields", _TListOf(_TNonEmptyString)),
3176 ("names", _TListOf(_TNonEmptyString)),
3177 ("use_locking", _TBool),
3181 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3182 "master_candidate", "offline", "drained"]
3184 _FIELDS_DYNAMIC = utils.FieldSet(
3186 "mtotal", "mnode", "mfree",
3188 "ctotal", "cnodes", "csockets",
3191 _FIELDS_STATIC = utils.FieldSet(*[
3192 "pinst_cnt", "sinst_cnt",
3193 "pinst_list", "sinst_list",
3194 "pip", "sip", "tags",
3196 "role"] + _SIMPLE_FIELDS
3199 def CheckArguments(self):
3200 _CheckOutputFields(static=self._FIELDS_STATIC,
3201 dynamic=self._FIELDS_DYNAMIC,
3202 selected=self.op.output_fields)
3204 def ExpandNames(self):
3205 self.needed_locks = {}
3206 self.share_locks[locking.LEVEL_NODE] = 1
3209 self.wanted = _GetWantedNodes(self, self.op.names)
3211 self.wanted = locking.ALL_SET
3213 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3214 self.do_locking = self.do_node_query and self.op.use_locking
3216 # if we don't request only static fields, we need to lock the nodes
3217 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3219 def Exec(self, feedback_fn):
3220 """Computes the list of nodes and their attributes.
3223 all_info = self.cfg.GetAllNodesInfo()
3225 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3226 elif self.wanted != locking.ALL_SET:
3227 nodenames = self.wanted
3228 missing = set(nodenames).difference(all_info.keys())
3230 raise errors.OpExecError(
3231 "Some nodes were removed before retrieving their data: %s" % missing)
3233 nodenames = all_info.keys()
3235 nodenames = utils.NiceSort(nodenames)
3236 nodelist = [all_info[name] for name in nodenames]
3238 # begin data gathering
3240 if self.do_node_query:
3242 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3243 self.cfg.GetHypervisorType())
3244 for name in nodenames:
3245 nodeinfo = node_data[name]
3246 if not nodeinfo.fail_msg and nodeinfo.payload:
3247 nodeinfo = nodeinfo.payload
3248 fn = utils.TryConvert
3250 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3251 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3252 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3253 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3254 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3255 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3256 "bootid": nodeinfo.get('bootid', None),
3257 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3258 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3261 live_data[name] = {}
3263 live_data = dict.fromkeys(nodenames, {})
3265 node_to_primary = dict([(name, set()) for name in nodenames])
3266 node_to_secondary = dict([(name, set()) for name in nodenames])
3268 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3269 "sinst_cnt", "sinst_list"))
3270 if inst_fields & frozenset(self.op.output_fields):
3271 inst_data = self.cfg.GetAllInstancesInfo()
3273 for inst in inst_data.values():
3274 if inst.primary_node in node_to_primary:
3275 node_to_primary[inst.primary_node].add(inst.name)
3276 for secnode in inst.secondary_nodes:
3277 if secnode in node_to_secondary:
3278 node_to_secondary[secnode].add(inst.name)
3280 master_node = self.cfg.GetMasterNode()
3282 # end data gathering
3285 for node in nodelist:
3287 for field in self.op.output_fields:
3288 if field in self._SIMPLE_FIELDS:
3289 val = getattr(node, field)
3290 elif field == "pinst_list":
3291 val = list(node_to_primary[node.name])
3292 elif field == "sinst_list":
3293 val = list(node_to_secondary[node.name])
3294 elif field == "pinst_cnt":
3295 val = len(node_to_primary[node.name])
3296 elif field == "sinst_cnt":
3297 val = len(node_to_secondary[node.name])
3298 elif field == "pip":
3299 val = node.primary_ip
3300 elif field == "sip":
3301 val = node.secondary_ip
3302 elif field == "tags":
3303 val = list(node.GetTags())
3304 elif field == "master":
3305 val = node.name == master_node
3306 elif self._FIELDS_DYNAMIC.Matches(field):
3307 val = live_data[node.name].get(field, None)
3308 elif field == "role":
3309 if node.name == master_node:
3311 elif node.master_candidate:
3320 raise errors.ParameterError(field)
3321 node_output.append(val)
3322 output.append(node_output)
3327 class LUQueryNodeVolumes(NoHooksLU):
3328 """Logical unit for getting volumes on node(s).
3332 ("nodes", _TListOf(_TNonEmptyString)),
3333 ("output_fields", _TListOf(_TNonEmptyString)),
3336 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3337 _FIELDS_STATIC = utils.FieldSet("node")
3339 def CheckArguments(self):
3340 _CheckOutputFields(static=self._FIELDS_STATIC,
3341 dynamic=self._FIELDS_DYNAMIC,
3342 selected=self.op.output_fields)
3344 def ExpandNames(self):
3345 self.needed_locks = {}
3346 self.share_locks[locking.LEVEL_NODE] = 1
3347 if not self.op.nodes:
3348 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3350 self.needed_locks[locking.LEVEL_NODE] = \
3351 _GetWantedNodes(self, self.op.nodes)
3353 def Exec(self, feedback_fn):
3354 """Computes the list of nodes and their attributes.
3357 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3358 volumes = self.rpc.call_node_volumes(nodenames)
3360 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3361 in self.cfg.GetInstanceList()]
3363 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3366 for node in nodenames:
3367 nresult = volumes[node]
3370 msg = nresult.fail_msg
3372 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3375 node_vols = nresult.payload[:]
3376 node_vols.sort(key=lambda vol: vol['dev'])
3378 for vol in node_vols:
3380 for field in self.op.output_fields:
3383 elif field == "phys":
3387 elif field == "name":
3389 elif field == "size":
3390 val = int(float(vol['size']))
3391 elif field == "instance":
3393 if node not in lv_by_node[inst]:
3395 if vol['name'] in lv_by_node[inst][node]:
3401 raise errors.ParameterError(field)
3402 node_output.append(str(val))
3404 output.append(node_output)
3409 class LUQueryNodeStorage(NoHooksLU):
3410 """Logical unit for getting information on storage units on node(s).
3413 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3415 ("nodes", _TListOf(_TNonEmptyString)),
3416 ("storage_type", _CheckStorageType),
3417 ("output_fields", _TListOf(_TNonEmptyString)),
3419 _OP_DEFS = [("name", None)]
3422 def CheckArguments(self):
3423 _CheckOutputFields(static=self._FIELDS_STATIC,
3424 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3425 selected=self.op.output_fields)
3427 def ExpandNames(self):
3428 self.needed_locks = {}
3429 self.share_locks[locking.LEVEL_NODE] = 1
3432 self.needed_locks[locking.LEVEL_NODE] = \
3433 _GetWantedNodes(self, self.op.nodes)
3435 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3437 def Exec(self, feedback_fn):
3438 """Computes the list of nodes and their attributes.
3441 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3443 # Always get name to sort by
3444 if constants.SF_NAME in self.op.output_fields:
3445 fields = self.op.output_fields[:]
3447 fields = [constants.SF_NAME] + self.op.output_fields
3449 # Never ask for node or type as it's only known to the LU
3450 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3451 while extra in fields:
3452 fields.remove(extra)
3454 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3455 name_idx = field_idx[constants.SF_NAME]
3457 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3458 data = self.rpc.call_storage_list(self.nodes,
3459 self.op.storage_type, st_args,
3460 self.op.name, fields)
3464 for node in utils.NiceSort(self.nodes):
3465 nresult = data[node]
3469 msg = nresult.fail_msg
3471 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3474 rows = dict([(row[name_idx], row) for row in nresult.payload])
3476 for name in utils.NiceSort(rows.keys()):
3481 for field in self.op.output_fields:
3482 if field == constants.SF_NODE:
3484 elif field == constants.SF_TYPE:
3485 val = self.op.storage_type
3486 elif field in field_idx:
3487 val = row[field_idx[field]]
3489 raise errors.ParameterError(field)
3498 class LUModifyNodeStorage(NoHooksLU):
3499 """Logical unit for modifying a storage volume on a node.
3503 ("node_name", _TNonEmptyString),
3504 ("storage_type", _CheckStorageType),
3505 ("name", _TNonEmptyString),
3506 ("changes", _TDict),
3510 def CheckArguments(self):
3511 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3513 storage_type = self.op.storage_type
3516 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3518 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3519 " modified" % storage_type,
3522 diff = set(self.op.changes.keys()) - modifiable
3524 raise errors.OpPrereqError("The following fields can not be modified for"
3525 " storage units of type '%s': %r" %
3526 (storage_type, list(diff)),
3529 def ExpandNames(self):
3530 self.needed_locks = {
3531 locking.LEVEL_NODE: self.op.node_name,
3534 def Exec(self, feedback_fn):
3535 """Computes the list of nodes and their attributes.
3538 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3539 result = self.rpc.call_storage_modify(self.op.node_name,
3540 self.op.storage_type, st_args,
3541 self.op.name, self.op.changes)
3542 result.Raise("Failed to modify storage unit '%s' on %s" %
3543 (self.op.name, self.op.node_name))
3546 class LUAddNode(LogicalUnit):
3547 """Logical unit for adding node to the cluster.
3551 HTYPE = constants.HTYPE_NODE
3553 ("node_name", _TNonEmptyString),
3555 _OP_DEFS = [("secondary_ip", None)]
3557 def CheckArguments(self):
3558 # validate/normalize the node name
3559 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3561 def BuildHooksEnv(self):
3564 This will run on all nodes before, and on all nodes + the new node after.
3568 "OP_TARGET": self.op.node_name,
3569 "NODE_NAME": self.op.node_name,
3570 "NODE_PIP": self.op.primary_ip,
3571 "NODE_SIP": self.op.secondary_ip,
3573 nodes_0 = self.cfg.GetNodeList()
3574 nodes_1 = nodes_0 + [self.op.node_name, ]
3575 return env, nodes_0, nodes_1
3577 def CheckPrereq(self):
3578 """Check prerequisites.
3581 - the new node is not already in the config
3583 - its parameters (single/dual homed) matches the cluster
3585 Any errors are signaled by raising errors.OpPrereqError.
3588 node_name = self.op.node_name
3591 dns_data = utils.GetHostInfo(node_name)
3593 node = dns_data.name
3594 primary_ip = self.op.primary_ip = dns_data.ip
3595 if self.op.secondary_ip is None:
3596 self.op.secondary_ip = primary_ip
3597 if not utils.IsValidIP4(self.op.secondary_ip):
3598 raise errors.OpPrereqError("Invalid secondary IP given",
3600 secondary_ip = self.op.secondary_ip
3602 node_list = cfg.GetNodeList()
3603 if not self.op.readd and node in node_list:
3604 raise errors.OpPrereqError("Node %s is already in the configuration" %
3605 node, errors.ECODE_EXISTS)
3606 elif self.op.readd and node not in node_list:
3607 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3610 self.changed_primary_ip = False
3612 for existing_node_name in node_list:
3613 existing_node = cfg.GetNodeInfo(existing_node_name)
3615 if self.op.readd and node == existing_node_name:
3616 if existing_node.secondary_ip != secondary_ip:
3617 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3618 " address configuration as before",
3620 if existing_node.primary_ip != primary_ip:
3621 self.changed_primary_ip = True
3625 if (existing_node.primary_ip == primary_ip or
3626 existing_node.secondary_ip == primary_ip or
3627 existing_node.primary_ip == secondary_ip or
3628 existing_node.secondary_ip == secondary_ip):
3629 raise errors.OpPrereqError("New node ip address(es) conflict with"
3630 " existing node %s" % existing_node.name,
3631 errors.ECODE_NOTUNIQUE)
3633 # check that the type of the node (single versus dual homed) is the
3634 # same as for the master
3635 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3636 master_singlehomed = myself.secondary_ip == myself.primary_ip
3637 newbie_singlehomed = secondary_ip == primary_ip
3638 if master_singlehomed != newbie_singlehomed:
3639 if master_singlehomed:
3640 raise errors.OpPrereqError("The master has no private ip but the"
3641 " new node has one",
3644 raise errors.OpPrereqError("The master has a private ip but the"
3645 " new node doesn't have one",
3648 # checks reachability
3649 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3650 raise errors.OpPrereqError("Node not reachable by ping",
3651 errors.ECODE_ENVIRON)
3653 if not newbie_singlehomed:
3654 # check reachability from my secondary ip to newbie's secondary ip
3655 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3656 source=myself.secondary_ip):
3657 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3658 " based ping to noded port",
3659 errors.ECODE_ENVIRON)
3666 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3669 self.new_node = self.cfg.GetNodeInfo(node)
3670 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3672 self.new_node = objects.Node(name=node,
3673 primary_ip=primary_ip,
3674 secondary_ip=secondary_ip,
3675 master_candidate=self.master_candidate,
3676 offline=False, drained=False)
3678 def Exec(self, feedback_fn):
3679 """Adds the new node to the cluster.
3682 new_node = self.new_node
3683 node = new_node.name
3685 # for re-adds, reset the offline/drained/master-candidate flags;
3686 # we need to reset here, otherwise offline would prevent RPC calls
3687 # later in the procedure; this also means that if the re-add
3688 # fails, we are left with a non-offlined, broken node
3690 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3691 self.LogInfo("Readding a node, the offline/drained flags were reset")
3692 # if we demote the node, we do cleanup later in the procedure
3693 new_node.master_candidate = self.master_candidate
3694 if self.changed_primary_ip:
3695 new_node.primary_ip = self.op.primary_ip
3697 # notify the user about any possible mc promotion
3698 if new_node.master_candidate:
3699 self.LogInfo("Node will be a master candidate")
3701 # check connectivity
3702 result = self.rpc.call_version([node])[node]
3703 result.Raise("Can't get version information from node %s" % node)
3704 if constants.PROTOCOL_VERSION == result.payload:
3705 logging.info("Communication to node %s fine, sw version %s match",
3706 node, result.payload)
3708 raise errors.OpExecError("Version mismatch master version %s,"
3709 " node version %s" %
3710 (constants.PROTOCOL_VERSION, result.payload))
3713 if self.cfg.GetClusterInfo().modify_ssh_setup:
3714 logging.info("Copy ssh key to node %s", node)
3715 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3717 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3718 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3722 keyarray.append(utils.ReadFile(i))
3724 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3725 keyarray[2], keyarray[3], keyarray[4],
3727 result.Raise("Cannot transfer ssh keys to the new node")
3729 # Add node to our /etc/hosts, and add key to known_hosts
3730 if self.cfg.GetClusterInfo().modify_etc_hosts:
3731 # FIXME: this should be done via an rpc call to node daemon
3732 utils.AddHostToEtcHosts(new_node.name)
3734 if new_node.secondary_ip != new_node.primary_ip:
3735 result = self.rpc.call_node_has_ip_address(new_node.name,
3736 new_node.secondary_ip)
3737 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3738 prereq=True, ecode=errors.ECODE_ENVIRON)
3739 if not result.payload:
3740 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3741 " you gave (%s). Please fix and re-run this"
3742 " command." % new_node.secondary_ip)
3744 node_verify_list = [self.cfg.GetMasterNode()]
3745 node_verify_param = {
3746 constants.NV_NODELIST: [node],
3747 # TODO: do a node-net-test as well?
3750 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3751 self.cfg.GetClusterName())
3752 for verifier in node_verify_list:
3753 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3754 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3756 for failed in nl_payload:
3757 feedback_fn("ssh/hostname verification failed"
3758 " (checking from %s): %s" %
3759 (verifier, nl_payload[failed]))
3760 raise errors.OpExecError("ssh/hostname verification failed.")
3763 _RedistributeAncillaryFiles(self)
3764 self.context.ReaddNode(new_node)
3765 # make sure we redistribute the config
3766 self.cfg.Update(new_node, feedback_fn)
3767 # and make sure the new node will not have old files around
3768 if not new_node.master_candidate:
3769 result = self.rpc.call_node_demote_from_mc(new_node.name)
3770 msg = result.fail_msg
3772 self.LogWarning("Node failed to demote itself from master"
3773 " candidate status: %s" % msg)
3775 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3776 self.context.AddNode(new_node, self.proc.GetECId())
3779 class LUSetNodeParams(LogicalUnit):
3780 """Modifies the parameters of a node.
3783 HPATH = "node-modify"
3784 HTYPE = constants.HTYPE_NODE
3785 _OP_REQP = [("node_name", _TNonEmptyString)]
3788 def CheckArguments(self):
3789 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3790 _CheckBooleanOpField(self.op, 'master_candidate')
3791 _CheckBooleanOpField(self.op, 'offline')
3792 _CheckBooleanOpField(self.op, 'drained')
3793 _CheckBooleanOpField(self.op, 'auto_promote')
3794 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3795 if all_mods.count(None) == 3:
3796 raise errors.OpPrereqError("Please pass at least one modification",
3798 if all_mods.count(True) > 1:
3799 raise errors.OpPrereqError("Can't set the node into more than one"
3800 " state at the same time",
3803 # Boolean value that tells us whether we're offlining or draining the node
3804 self.offline_or_drain = (self.op.offline == True or
3805 self.op.drained == True)
3806 self.deoffline_or_drain = (self.op.offline == False or
3807 self.op.drained == False)
3808 self.might_demote = (self.op.master_candidate == False or
3809 self.offline_or_drain)
3811 self.lock_all = self.op.auto_promote and self.might_demote
3814 def ExpandNames(self):
3816 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3818 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3820 def BuildHooksEnv(self):
3823 This runs on the master node.
3827 "OP_TARGET": self.op.node_name,
3828 "MASTER_CANDIDATE": str(self.op.master_candidate),
3829 "OFFLINE": str(self.op.offline),
3830 "DRAINED": str(self.op.drained),
3832 nl = [self.cfg.GetMasterNode(),
3836 def CheckPrereq(self):
3837 """Check prerequisites.
3839 This only checks the instance list against the existing names.
3842 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3844 if (self.op.master_candidate is not None or
3845 self.op.drained is not None or
3846 self.op.offline is not None):
3847 # we can't change the master's node flags
3848 if self.op.node_name == self.cfg.GetMasterNode():
3849 raise errors.OpPrereqError("The master role can be changed"
3850 " only via masterfailover",
3854 if node.master_candidate and self.might_demote and not self.lock_all:
3855 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3856 # check if after removing the current node, we're missing master
3858 (mc_remaining, mc_should, _) = \
3859 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3860 if mc_remaining < mc_should:
3861 raise errors.OpPrereqError("Not enough master candidates, please"
3862 " pass auto_promote to allow promotion",
3865 if (self.op.master_candidate == True and
3866 ((node.offline and not self.op.offline == False) or
3867 (node.drained and not self.op.drained == False))):
3868 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3869 " to master_candidate" % node.name,
3872 # If we're being deofflined/drained, we'll MC ourself if needed
3873 if (self.deoffline_or_drain and not self.offline_or_drain and not
3874 self.op.master_candidate == True and not node.master_candidate):
3875 self.op.master_candidate = _DecideSelfPromotion(self)
3876 if self.op.master_candidate:
3877 self.LogInfo("Autopromoting node to master candidate")
3881 def Exec(self, feedback_fn):
3890 if self.op.offline is not None:
3891 node.offline = self.op.offline
3892 result.append(("offline", str(self.op.offline)))
3893 if self.op.offline == True:
3894 if node.master_candidate:
3895 node.master_candidate = False
3897 result.append(("master_candidate", "auto-demotion due to offline"))
3899 node.drained = False
3900 result.append(("drained", "clear drained status due to offline"))
3902 if self.op.master_candidate is not None:
3903 node.master_candidate = self.op.master_candidate
3905 result.append(("master_candidate", str(self.op.master_candidate)))
3906 if self.op.master_candidate == False:
3907 rrc = self.rpc.call_node_demote_from_mc(node.name)
3910 self.LogWarning("Node failed to demote itself: %s" % msg)
3912 if self.op.drained is not None:
3913 node.drained = self.op.drained
3914 result.append(("drained", str(self.op.drained)))
3915 if self.op.drained == True:
3916 if node.master_candidate:
3917 node.master_candidate = False
3919 result.append(("master_candidate", "auto-demotion due to drain"))
3920 rrc = self.rpc.call_node_demote_from_mc(node.name)
3923 self.LogWarning("Node failed to demote itself: %s" % msg)
3925 node.offline = False
3926 result.append(("offline", "clear offline status due to drain"))
3928 # we locked all nodes, we adjust the CP before updating this node
3930 _AdjustCandidatePool(self, [node.name])
3932 # this will trigger configuration file update, if needed
3933 self.cfg.Update(node, feedback_fn)
3935 # this will trigger job queue propagation or cleanup
3937 self.context.ReaddNode(node)
3942 class LUPowercycleNode(NoHooksLU):
3943 """Powercycles a node.
3947 ("node_name", _TNonEmptyString),
3952 def CheckArguments(self):
3953 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3954 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3955 raise errors.OpPrereqError("The node is the master and the force"
3956 " parameter was not set",
3959 def ExpandNames(self):
3960 """Locking for PowercycleNode.
3962 This is a last-resort option and shouldn't block on other
3963 jobs. Therefore, we grab no locks.
3966 self.needed_locks = {}
3968 def Exec(self, feedback_fn):
3972 result = self.rpc.call_node_powercycle(self.op.node_name,
3973 self.cfg.GetHypervisorType())
3974 result.Raise("Failed to schedule the reboot")
3975 return result.payload
3978 class LUQueryClusterInfo(NoHooksLU):
3979 """Query cluster configuration.
3985 def ExpandNames(self):
3986 self.needed_locks = {}
3988 def Exec(self, feedback_fn):
3989 """Return cluster config.
3992 cluster = self.cfg.GetClusterInfo()
3995 # Filter just for enabled hypervisors
3996 for os_name, hv_dict in cluster.os_hvp.items():
3997 os_hvp[os_name] = {}
3998 for hv_name, hv_params in hv_dict.items():
3999 if hv_name in cluster.enabled_hypervisors:
4000 os_hvp[os_name][hv_name] = hv_params
4003 "software_version": constants.RELEASE_VERSION,
4004 "protocol_version": constants.PROTOCOL_VERSION,
4005 "config_version": constants.CONFIG_VERSION,
4006 "os_api_version": max(constants.OS_API_VERSIONS),
4007 "export_version": constants.EXPORT_VERSION,
4008 "architecture": (platform.architecture()[0], platform.machine()),
4009 "name": cluster.cluster_name,
4010 "master": cluster.master_node,
4011 "default_hypervisor": cluster.enabled_hypervisors[0],
4012 "enabled_hypervisors": cluster.enabled_hypervisors,
4013 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4014 for hypervisor_name in cluster.enabled_hypervisors]),
4016 "beparams": cluster.beparams,
4017 "osparams": cluster.osparams,
4018 "nicparams": cluster.nicparams,
4019 "candidate_pool_size": cluster.candidate_pool_size,
4020 "master_netdev": cluster.master_netdev,
4021 "volume_group_name": cluster.volume_group_name,
4022 "file_storage_dir": cluster.file_storage_dir,
4023 "maintain_node_health": cluster.maintain_node_health,
4024 "ctime": cluster.ctime,
4025 "mtime": cluster.mtime,
4026 "uuid": cluster.uuid,
4027 "tags": list(cluster.GetTags()),
4028 "uid_pool": cluster.uid_pool,
4034 class LUQueryConfigValues(NoHooksLU):
4035 """Return configuration values.
4040 _FIELDS_DYNAMIC = utils.FieldSet()
4041 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4044 def CheckArguments(self):
4045 _CheckOutputFields(static=self._FIELDS_STATIC,
4046 dynamic=self._FIELDS_DYNAMIC,
4047 selected=self.op.output_fields)
4049 def ExpandNames(self):
4050 self.needed_locks = {}
4052 def Exec(self, feedback_fn):
4053 """Dump a representation of the cluster config to the standard output.
4057 for field in self.op.output_fields:
4058 if field == "cluster_name":
4059 entry = self.cfg.GetClusterName()
4060 elif field == "master_node":
4061 entry = self.cfg.GetMasterNode()
4062 elif field == "drain_flag":
4063 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4064 elif field == "watcher_pause":
4065 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4067 raise errors.ParameterError(field)
4068 values.append(entry)
4072 class LUActivateInstanceDisks(NoHooksLU):
4073 """Bring up an instance's disks.
4076 _OP_REQP = [("instance_name", _TNonEmptyString)]
4077 _OP_DEFS = [("ignore_size", False)]
4080 def ExpandNames(self):
4081 self._ExpandAndLockInstance()
4082 self.needed_locks[locking.LEVEL_NODE] = []
4083 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4085 def DeclareLocks(self, level):
4086 if level == locking.LEVEL_NODE:
4087 self._LockInstancesNodes()
4089 def CheckPrereq(self):
4090 """Check prerequisites.
4092 This checks that the instance is in the cluster.
4095 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4096 assert self.instance is not None, \
4097 "Cannot retrieve locked instance %s" % self.op.instance_name
4098 _CheckNodeOnline(self, self.instance.primary_node)
4100 def Exec(self, feedback_fn):
4101 """Activate the disks.
4104 disks_ok, disks_info = \
4105 _AssembleInstanceDisks(self, self.instance,
4106 ignore_size=self.op.ignore_size)
4108 raise errors.OpExecError("Cannot activate block devices")
4113 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4115 """Prepare the block devices for an instance.
4117 This sets up the block devices on all nodes.
4119 @type lu: L{LogicalUnit}
4120 @param lu: the logical unit on whose behalf we execute
4121 @type instance: L{objects.Instance}
4122 @param instance: the instance for whose disks we assemble
4123 @type disks: list of L{objects.Disk} or None
4124 @param disks: which disks to assemble (or all, if None)
4125 @type ignore_secondaries: boolean
4126 @param ignore_secondaries: if true, errors on secondary nodes
4127 won't result in an error return from the function
4128 @type ignore_size: boolean
4129 @param ignore_size: if true, the current known size of the disk
4130 will not be used during the disk activation, useful for cases
4131 when the size is wrong
4132 @return: False if the operation failed, otherwise a list of
4133 (host, instance_visible_name, node_visible_name)
4134 with the mapping from node devices to instance devices
4139 iname = instance.name
4140 disks = _ExpandCheckDisks(instance, disks)
4142 # With the two passes mechanism we try to reduce the window of
4143 # opportunity for the race condition of switching DRBD to primary
4144 # before handshaking occured, but we do not eliminate it
4146 # The proper fix would be to wait (with some limits) until the
4147 # connection has been made and drbd transitions from WFConnection
4148 # into any other network-connected state (Connected, SyncTarget,
4151 # 1st pass, assemble on all nodes in secondary mode
4152 for inst_disk in disks:
4153 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4155 node_disk = node_disk.Copy()
4156 node_disk.UnsetSize()
4157 lu.cfg.SetDiskID(node_disk, node)
4158 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4159 msg = result.fail_msg
4161 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4162 " (is_primary=False, pass=1): %s",
4163 inst_disk.iv_name, node, msg)
4164 if not ignore_secondaries:
4167 # FIXME: race condition on drbd migration to primary
4169 # 2nd pass, do only the primary node
4170 for inst_disk in disks:
4173 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4174 if node != instance.primary_node:
4177 node_disk = node_disk.Copy()
4178 node_disk.UnsetSize()
4179 lu.cfg.SetDiskID(node_disk, node)
4180 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4181 msg = result.fail_msg
4183 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4184 " (is_primary=True, pass=2): %s",
4185 inst_disk.iv_name, node, msg)
4188 dev_path = result.payload
4190 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4192 # leave the disks configured for the primary node
4193 # this is a workaround that would be fixed better by
4194 # improving the logical/physical id handling
4196 lu.cfg.SetDiskID(disk, instance.primary_node)
4198 return disks_ok, device_info
4201 def _StartInstanceDisks(lu, instance, force):
4202 """Start the disks of an instance.
4205 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4206 ignore_secondaries=force)
4208 _ShutdownInstanceDisks(lu, instance)
4209 if force is not None and not force:
4210 lu.proc.LogWarning("", hint="If the message above refers to a"
4212 " you can retry the operation using '--force'.")
4213 raise errors.OpExecError("Disk consistency error")
4216 class LUDeactivateInstanceDisks(NoHooksLU):
4217 """Shutdown an instance's disks.
4220 _OP_REQP = [("instance_name", _TNonEmptyString)]
4223 def ExpandNames(self):
4224 self._ExpandAndLockInstance()
4225 self.needed_locks[locking.LEVEL_NODE] = []
4226 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4228 def DeclareLocks(self, level):
4229 if level == locking.LEVEL_NODE:
4230 self._LockInstancesNodes()
4232 def CheckPrereq(self):
4233 """Check prerequisites.
4235 This checks that the instance is in the cluster.
4238 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4239 assert self.instance is not None, \
4240 "Cannot retrieve locked instance %s" % self.op.instance_name
4242 def Exec(self, feedback_fn):
4243 """Deactivate the disks
4246 instance = self.instance
4247 _SafeShutdownInstanceDisks(self, instance)
4250 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4251 """Shutdown block devices of an instance.
4253 This function checks if an instance is running, before calling
4254 _ShutdownInstanceDisks.
4257 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4258 _ShutdownInstanceDisks(lu, instance, disks=disks)
4261 def _ExpandCheckDisks(instance, disks):
4262 """Return the instance disks selected by the disks list
4264 @type disks: list of L{objects.Disk} or None
4265 @param disks: selected disks
4266 @rtype: list of L{objects.Disk}
4267 @return: selected instance disks to act on
4271 return instance.disks
4273 if not set(disks).issubset(instance.disks):
4274 raise errors.ProgrammerError("Can only act on disks belonging to the"
4279 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4280 """Shutdown block devices of an instance.
4282 This does the shutdown on all nodes of the instance.
4284 If the ignore_primary is false, errors on the primary node are
4289 disks = _ExpandCheckDisks(instance, disks)
4292 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4293 lu.cfg.SetDiskID(top_disk, node)
4294 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4295 msg = result.fail_msg
4297 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4298 disk.iv_name, node, msg)
4299 if not ignore_primary or node != instance.primary_node:
4304 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4305 """Checks if a node has enough free memory.
4307 This function check if a given node has the needed amount of free
4308 memory. In case the node has less memory or we cannot get the
4309 information from the node, this function raise an OpPrereqError
4312 @type lu: C{LogicalUnit}
4313 @param lu: a logical unit from which we get configuration data
4315 @param node: the node to check
4316 @type reason: C{str}
4317 @param reason: string to use in the error message
4318 @type requested: C{int}
4319 @param requested: the amount of memory in MiB to check for
4320 @type hypervisor_name: C{str}
4321 @param hypervisor_name: the hypervisor to ask for memory stats
4322 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4323 we cannot check the node
4326 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4327 nodeinfo[node].Raise("Can't get data from node %s" % node,
4328 prereq=True, ecode=errors.ECODE_ENVIRON)
4329 free_mem = nodeinfo[node].payload.get('memory_free', None)
4330 if not isinstance(free_mem, int):
4331 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4332 " was '%s'" % (node, free_mem),
4333 errors.ECODE_ENVIRON)
4334 if requested > free_mem:
4335 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4336 " needed %s MiB, available %s MiB" %
4337 (node, reason, requested, free_mem),
4341 def _CheckNodesFreeDisk(lu, nodenames, requested):
4342 """Checks if nodes have enough free disk space in the default VG.
4344 This function check if all given nodes have the needed amount of
4345 free disk. In case any node has less disk or we cannot get the
4346 information from the node, this function raise an OpPrereqError
4349 @type lu: C{LogicalUnit}
4350 @param lu: a logical unit from which we get configuration data
4351 @type nodenames: C{list}
4352 @param nodenames: the list of node names to check
4353 @type requested: C{int}
4354 @param requested: the amount of disk in MiB to check for
4355 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4356 we cannot check the node
4359 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4360 lu.cfg.GetHypervisorType())
4361 for node in nodenames:
4362 info = nodeinfo[node]
4363 info.Raise("Cannot get current information from node %s" % node,
4364 prereq=True, ecode=errors.ECODE_ENVIRON)
4365 vg_free = info.payload.get("vg_free", None)
4366 if not isinstance(vg_free, int):
4367 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4368 " result was '%s'" % (node, vg_free),
4369 errors.ECODE_ENVIRON)
4370 if requested > vg_free:
4371 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4372 " required %d MiB, available %d MiB" %
4373 (node, requested, vg_free),
4377 class LUStartupInstance(LogicalUnit):
4378 """Starts an instance.
4381 HPATH = "instance-start"
4382 HTYPE = constants.HTYPE_INSTANCE
4384 ("instance_name", _TNonEmptyString),
4386 ("beparams", _TDict),
4387 ("hvparams", _TDict),
4390 ("beparams", _EmptyDict),
4391 ("hvparams", _EmptyDict),
4395 def CheckArguments(self):
4397 if self.op.beparams:
4398 # fill the beparams dict
4399 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4401 def ExpandNames(self):
4402 self._ExpandAndLockInstance()
4404 def BuildHooksEnv(self):
4407 This runs on master, primary and secondary nodes of the instance.
4411 "FORCE": self.op.force,
4413 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4414 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4417 def CheckPrereq(self):
4418 """Check prerequisites.
4420 This checks that the instance is in the cluster.
4423 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4424 assert self.instance is not None, \
4425 "Cannot retrieve locked instance %s" % self.op.instance_name
4428 if self.op.hvparams:
4429 # check hypervisor parameter syntax (locally)
4430 cluster = self.cfg.GetClusterInfo()
4431 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4432 filled_hvp = cluster.FillHV(instance)
4433 filled_hvp.update(self.op.hvparams)
4434 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4435 hv_type.CheckParameterSyntax(filled_hvp)
4436 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4438 _CheckNodeOnline(self, instance.primary_node)
4440 bep = self.cfg.GetClusterInfo().FillBE(instance)
4441 # check bridges existence
4442 _CheckInstanceBridgesExist(self, instance)
4444 remote_info = self.rpc.call_instance_info(instance.primary_node,
4446 instance.hypervisor)
4447 remote_info.Raise("Error checking node %s" % instance.primary_node,
4448 prereq=True, ecode=errors.ECODE_ENVIRON)
4449 if not remote_info.payload: # not running already
4450 _CheckNodeFreeMemory(self, instance.primary_node,
4451 "starting instance %s" % instance.name,
4452 bep[constants.BE_MEMORY], instance.hypervisor)
4454 def Exec(self, feedback_fn):
4455 """Start the instance.
4458 instance = self.instance
4459 force = self.op.force
4461 self.cfg.MarkInstanceUp(instance.name)
4463 node_current = instance.primary_node
4465 _StartInstanceDisks(self, instance, force)
4467 result = self.rpc.call_instance_start(node_current, instance,
4468 self.op.hvparams, self.op.beparams)
4469 msg = result.fail_msg
4471 _ShutdownInstanceDisks(self, instance)
4472 raise errors.OpExecError("Could not start instance: %s" % msg)
4475 class LURebootInstance(LogicalUnit):
4476 """Reboot an instance.
4479 HPATH = "instance-reboot"
4480 HTYPE = constants.HTYPE_INSTANCE
4482 ("instance_name", _TNonEmptyString),
4483 ("ignore_secondaries", _TBool),
4484 ("reboot_type", _TElemOf(constants.REBOOT_TYPES)),
4486 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4489 def ExpandNames(self):
4490 self._ExpandAndLockInstance()
4492 def BuildHooksEnv(self):
4495 This runs on master, primary and secondary nodes of the instance.
4499 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4500 "REBOOT_TYPE": self.op.reboot_type,
4501 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4503 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4504 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4507 def CheckPrereq(self):
4508 """Check prerequisites.
4510 This checks that the instance is in the cluster.
4513 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4514 assert self.instance is not None, \
4515 "Cannot retrieve locked instance %s" % self.op.instance_name
4517 _CheckNodeOnline(self, instance.primary_node)
4519 # check bridges existence
4520 _CheckInstanceBridgesExist(self, instance)
4522 def Exec(self, feedback_fn):
4523 """Reboot the instance.
4526 instance = self.instance
4527 ignore_secondaries = self.op.ignore_secondaries
4528 reboot_type = self.op.reboot_type
4530 node_current = instance.primary_node
4532 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4533 constants.INSTANCE_REBOOT_HARD]:
4534 for disk in instance.disks:
4535 self.cfg.SetDiskID(disk, node_current)
4536 result = self.rpc.call_instance_reboot(node_current, instance,
4538 self.op.shutdown_timeout)
4539 result.Raise("Could not reboot instance")
4541 result = self.rpc.call_instance_shutdown(node_current, instance,
4542 self.op.shutdown_timeout)
4543 result.Raise("Could not shutdown instance for full reboot")
4544 _ShutdownInstanceDisks(self, instance)
4545 _StartInstanceDisks(self, instance, ignore_secondaries)
4546 result = self.rpc.call_instance_start(node_current, instance, None, None)
4547 msg = result.fail_msg
4549 _ShutdownInstanceDisks(self, instance)
4550 raise errors.OpExecError("Could not start instance for"
4551 " full reboot: %s" % msg)
4553 self.cfg.MarkInstanceUp(instance.name)
4556 class LUShutdownInstance(LogicalUnit):
4557 """Shutdown an instance.
4560 HPATH = "instance-stop"
4561 HTYPE = constants.HTYPE_INSTANCE
4562 _OP_REQP = [("instance_name", _TNonEmptyString)]
4563 _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4566 def ExpandNames(self):
4567 self._ExpandAndLockInstance()
4569 def BuildHooksEnv(self):
4572 This runs on master, primary and secondary nodes of the instance.
4575 env = _BuildInstanceHookEnvByObject(self, self.instance)
4576 env["TIMEOUT"] = self.op.timeout
4577 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4580 def CheckPrereq(self):
4581 """Check prerequisites.
4583 This checks that the instance is in the cluster.
4586 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4587 assert self.instance is not None, \
4588 "Cannot retrieve locked instance %s" % self.op.instance_name
4589 _CheckNodeOnline(self, self.instance.primary_node)
4591 def Exec(self, feedback_fn):
4592 """Shutdown the instance.
4595 instance = self.instance
4596 node_current = instance.primary_node
4597 timeout = self.op.timeout
4598 self.cfg.MarkInstanceDown(instance.name)
4599 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4600 msg = result.fail_msg
4602 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4604 _ShutdownInstanceDisks(self, instance)
4607 class LUReinstallInstance(LogicalUnit):
4608 """Reinstall an instance.
4611 HPATH = "instance-reinstall"
4612 HTYPE = constants.HTYPE_INSTANCE
4613 _OP_REQP = [("instance_name", _TNonEmptyString)]
4616 ("force_variant", False),
4620 def ExpandNames(self):
4621 self._ExpandAndLockInstance()
4623 def BuildHooksEnv(self):
4626 This runs on master, primary and secondary nodes of the instance.
4629 env = _BuildInstanceHookEnvByObject(self, self.instance)
4630 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4633 def CheckPrereq(self):
4634 """Check prerequisites.
4636 This checks that the instance is in the cluster and is not running.
4639 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4640 assert instance is not None, \
4641 "Cannot retrieve locked instance %s" % self.op.instance_name
4642 _CheckNodeOnline(self, instance.primary_node)
4644 if instance.disk_template == constants.DT_DISKLESS:
4645 raise errors.OpPrereqError("Instance '%s' has no disks" %
4646 self.op.instance_name,
4648 _CheckInstanceDown(self, instance, "cannot reinstall")
4650 if self.op.os_type is not None:
4652 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4653 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4655 self.instance = instance
4657 def Exec(self, feedback_fn):
4658 """Reinstall the instance.
4661 inst = self.instance
4663 if self.op.os_type is not None:
4664 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4665 inst.os = self.op.os_type
4666 self.cfg.Update(inst, feedback_fn)
4668 _StartInstanceDisks(self, inst, None)
4670 feedback_fn("Running the instance OS create scripts...")
4671 # FIXME: pass debug option from opcode to backend
4672 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4673 self.op.debug_level)
4674 result.Raise("Could not install OS for instance %s on node %s" %
4675 (inst.name, inst.primary_node))
4677 _ShutdownInstanceDisks(self, inst)
4680 class LURecreateInstanceDisks(LogicalUnit):
4681 """Recreate an instance's missing disks.
4684 HPATH = "instance-recreate-disks"
4685 HTYPE = constants.HTYPE_INSTANCE
4687 ("instance_name", _TNonEmptyString),
4688 ("disks", _TListOf(_TPositiveInt)),
4692 def ExpandNames(self):
4693 self._ExpandAndLockInstance()
4695 def BuildHooksEnv(self):
4698 This runs on master, primary and secondary nodes of the instance.
4701 env = _BuildInstanceHookEnvByObject(self, self.instance)
4702 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4705 def CheckPrereq(self):
4706 """Check prerequisites.
4708 This checks that the instance is in the cluster and is not running.
4711 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4712 assert instance is not None, \
4713 "Cannot retrieve locked instance %s" % self.op.instance_name
4714 _CheckNodeOnline(self, instance.primary_node)
4716 if instance.disk_template == constants.DT_DISKLESS:
4717 raise errors.OpPrereqError("Instance '%s' has no disks" %
4718 self.op.instance_name, errors.ECODE_INVAL)
4719 _CheckInstanceDown(self, instance, "cannot recreate disks")
4721 if not self.op.disks:
4722 self.op.disks = range(len(instance.disks))
4724 for idx in self.op.disks:
4725 if idx >= len(instance.disks):
4726 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4729 self.instance = instance
4731 def Exec(self, feedback_fn):
4732 """Recreate the disks.
4736 for idx, _ in enumerate(self.instance.disks):
4737 if idx not in self.op.disks: # disk idx has not been passed in
4741 _CreateDisks(self, self.instance, to_skip=to_skip)
4744 class LURenameInstance(LogicalUnit):
4745 """Rename an instance.
4748 HPATH = "instance-rename"
4749 HTYPE = constants.HTYPE_INSTANCE
4751 ("instance_name", _TNonEmptyString),
4752 ("new_name", _TNonEmptyString),
4753 ("ignore_ip", _TBool),
4754 ("check_name", _TBool),
4756 _OP_DEFS = [("ignore_ip", False), ("check_name", True)]
4758 def BuildHooksEnv(self):
4761 This runs on master, primary and secondary nodes of the instance.
4764 env = _BuildInstanceHookEnvByObject(self, self.instance)
4765 env["INSTANCE_NEW_NAME"] = self.op.new_name
4766 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4769 def CheckPrereq(self):
4770 """Check prerequisites.
4772 This checks that the instance is in the cluster and is not running.
4775 self.op.instance_name = _ExpandInstanceName(self.cfg,
4776 self.op.instance_name)
4777 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4778 assert instance is not None
4779 _CheckNodeOnline(self, instance.primary_node)
4780 _CheckInstanceDown(self, instance, "cannot rename")
4781 self.instance = instance
4783 # new name verification
4784 if self.op.check_name:
4785 name_info = utils.GetHostInfo(self.op.new_name)
4786 self.op.new_name = name_info.name
4788 new_name = self.op.new_name
4790 instance_list = self.cfg.GetInstanceList()
4791 if new_name in instance_list:
4792 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4793 new_name, errors.ECODE_EXISTS)
4795 if not self.op.ignore_ip:
4796 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4797 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4798 (name_info.ip, new_name),
4799 errors.ECODE_NOTUNIQUE)
4801 def Exec(self, feedback_fn):
4802 """Reinstall the instance.
4805 inst = self.instance
4806 old_name = inst.name
4808 if inst.disk_template == constants.DT_FILE:
4809 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4811 self.cfg.RenameInstance(inst.name, self.op.new_name)
4812 # Change the instance lock. This is definitely safe while we hold the BGL
4813 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4814 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4816 # re-read the instance from the configuration after rename
4817 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4819 if inst.disk_template == constants.DT_FILE:
4820 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4821 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4822 old_file_storage_dir,
4823 new_file_storage_dir)
4824 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4825 " (but the instance has been renamed in Ganeti)" %
4826 (inst.primary_node, old_file_storage_dir,
4827 new_file_storage_dir))
4829 _StartInstanceDisks(self, inst, None)
4831 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4832 old_name, self.op.debug_level)
4833 msg = result.fail_msg
4835 msg = ("Could not run OS rename script for instance %s on node %s"
4836 " (but the instance has been renamed in Ganeti): %s" %
4837 (inst.name, inst.primary_node, msg))
4838 self.proc.LogWarning(msg)
4840 _ShutdownInstanceDisks(self, inst)
4843 class LURemoveInstance(LogicalUnit):
4844 """Remove an instance.
4847 HPATH = "instance-remove"
4848 HTYPE = constants.HTYPE_INSTANCE
4850 ("instance_name", _TNonEmptyString),
4851 ("ignore_failures", _TBool),
4853 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4856 def ExpandNames(self):
4857 self._ExpandAndLockInstance()
4858 self.needed_locks[locking.LEVEL_NODE] = []
4859 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4861 def DeclareLocks(self, level):
4862 if level == locking.LEVEL_NODE:
4863 self._LockInstancesNodes()
4865 def BuildHooksEnv(self):
4868 This runs on master, primary and secondary nodes of the instance.
4871 env = _BuildInstanceHookEnvByObject(self, self.instance)
4872 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4873 nl = [self.cfg.GetMasterNode()]
4874 nl_post = list(self.instance.all_nodes) + nl
4875 return env, nl, nl_post
4877 def CheckPrereq(self):
4878 """Check prerequisites.
4880 This checks that the instance is in the cluster.
4883 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4884 assert self.instance is not None, \
4885 "Cannot retrieve locked instance %s" % self.op.instance_name
4887 def Exec(self, feedback_fn):
4888 """Remove the instance.
4891 instance = self.instance
4892 logging.info("Shutting down instance %s on node %s",
4893 instance.name, instance.primary_node)
4895 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4896 self.op.shutdown_timeout)
4897 msg = result.fail_msg
4899 if self.op.ignore_failures:
4900 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4902 raise errors.OpExecError("Could not shutdown instance %s on"
4904 (instance.name, instance.primary_node, msg))
4906 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4909 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4910 """Utility function to remove an instance.
4913 logging.info("Removing block devices for instance %s", instance.name)
4915 if not _RemoveDisks(lu, instance):
4916 if not ignore_failures:
4917 raise errors.OpExecError("Can't remove instance's disks")
4918 feedback_fn("Warning: can't remove instance's disks")
4920 logging.info("Removing instance %s out of cluster config", instance.name)
4922 lu.cfg.RemoveInstance(instance.name)
4924 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4925 "Instance lock removal conflict"
4927 # Remove lock for the instance
4928 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4931 class LUQueryInstances(NoHooksLU):
4932 """Logical unit for querying instances.
4935 # pylint: disable-msg=W0142
4937 ("output_fields", _TListOf(_TNonEmptyString)),
4938 ("names", _TListOf(_TNonEmptyString)),
4939 ("use_locking", _TBool),
4942 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4943 "serial_no", "ctime", "mtime", "uuid"]
4944 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4946 "disk_template", "ip", "mac", "bridge",
4947 "nic_mode", "nic_link",
4948 "sda_size", "sdb_size", "vcpus", "tags",
4949 "network_port", "beparams",
4950 r"(disk)\.(size)/([0-9]+)",
4951 r"(disk)\.(sizes)", "disk_usage",
4952 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4953 r"(nic)\.(bridge)/([0-9]+)",
4954 r"(nic)\.(macs|ips|modes|links|bridges)",
4955 r"(disk|nic)\.(count)",
4957 ] + _SIMPLE_FIELDS +
4959 for name in constants.HVS_PARAMETERS
4960 if name not in constants.HVC_GLOBALS] +
4962 for name in constants.BES_PARAMETERS])
4963 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4966 def CheckArguments(self):
4967 _CheckOutputFields(static=self._FIELDS_STATIC,
4968 dynamic=self._FIELDS_DYNAMIC,
4969 selected=self.op.output_fields)
4971 def ExpandNames(self):
4972 self.needed_locks = {}
4973 self.share_locks[locking.LEVEL_INSTANCE] = 1
4974 self.share_locks[locking.LEVEL_NODE] = 1
4977 self.wanted = _GetWantedInstances(self, self.op.names)
4979 self.wanted = locking.ALL_SET
4981 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4982 self.do_locking = self.do_node_query and self.op.use_locking
4984 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4985 self.needed_locks[locking.LEVEL_NODE] = []
4986 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4988 def DeclareLocks(self, level):
4989 if level == locking.LEVEL_NODE and self.do_locking:
4990 self._LockInstancesNodes()
4992 def Exec(self, feedback_fn):
4993 """Computes the list of nodes and their attributes.
4996 # pylint: disable-msg=R0912
4997 # way too many branches here
4998 all_info = self.cfg.GetAllInstancesInfo()
4999 if self.wanted == locking.ALL_SET:
5000 # caller didn't specify instance names, so ordering is not important
5002 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5004 instance_names = all_info.keys()
5005 instance_names = utils.NiceSort(instance_names)
5007 # caller did specify names, so we must keep the ordering
5009 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5011 tgt_set = all_info.keys()
5012 missing = set(self.wanted).difference(tgt_set)
5014 raise errors.OpExecError("Some instances were removed before"
5015 " retrieving their data: %s" % missing)
5016 instance_names = self.wanted
5018 instance_list = [all_info[iname] for iname in instance_names]
5020 # begin data gathering
5022 nodes = frozenset([inst.primary_node for inst in instance_list])
5023 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5027 if self.do_node_query:
5029 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5031 result = node_data[name]
5033 # offline nodes will be in both lists
5034 off_nodes.append(name)
5036 bad_nodes.append(name)
5039 live_data.update(result.payload)
5040 # else no instance is alive
5042 live_data = dict([(name, {}) for name in instance_names])
5044 # end data gathering
5049 cluster = self.cfg.GetClusterInfo()
5050 for instance in instance_list:
5052 i_hv = cluster.FillHV(instance, skip_globals=True)
5053 i_be = cluster.FillBE(instance)
5054 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5055 for field in self.op.output_fields:
5056 st_match = self._FIELDS_STATIC.Matches(field)
5057 if field in self._SIMPLE_FIELDS:
5058 val = getattr(instance, field)
5059 elif field == "pnode":
5060 val = instance.primary_node
5061 elif field == "snodes":
5062 val = list(instance.secondary_nodes)
5063 elif field == "admin_state":
5064 val = instance.admin_up
5065 elif field == "oper_state":
5066 if instance.primary_node in bad_nodes:
5069 val = bool(live_data.get(instance.name))
5070 elif field == "status":
5071 if instance.primary_node in off_nodes:
5072 val = "ERROR_nodeoffline"
5073 elif instance.primary_node in bad_nodes:
5074 val = "ERROR_nodedown"
5076 running = bool(live_data.get(instance.name))
5078 if instance.admin_up:
5083 if instance.admin_up:
5087 elif field == "oper_ram":
5088 if instance.primary_node in bad_nodes:
5090 elif instance.name in live_data:
5091 val = live_data[instance.name].get("memory", "?")
5094 elif field == "vcpus":
5095 val = i_be[constants.BE_VCPUS]
5096 elif field == "disk_template":
5097 val = instance.disk_template
5100 val = instance.nics[0].ip
5103 elif field == "nic_mode":
5105 val = i_nicp[0][constants.NIC_MODE]
5108 elif field == "nic_link":
5110 val = i_nicp[0][constants.NIC_LINK]
5113 elif field == "bridge":
5114 if (instance.nics and
5115 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5116 val = i_nicp[0][constants.NIC_LINK]
5119 elif field == "mac":
5121 val = instance.nics[0].mac
5124 elif field == "sda_size" or field == "sdb_size":
5125 idx = ord(field[2]) - ord('a')
5127 val = instance.FindDisk(idx).size
5128 except errors.OpPrereqError:
5130 elif field == "disk_usage": # total disk usage per node
5131 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5132 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5133 elif field == "tags":
5134 val = list(instance.GetTags())
5135 elif field == "hvparams":
5137 elif (field.startswith(HVPREFIX) and
5138 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5139 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5140 val = i_hv.get(field[len(HVPREFIX):], None)
5141 elif field == "beparams":
5143 elif (field.startswith(BEPREFIX) and
5144 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5145 val = i_be.get(field[len(BEPREFIX):], None)
5146 elif st_match and st_match.groups():
5147 # matches a variable list
5148 st_groups = st_match.groups()
5149 if st_groups and st_groups[0] == "disk":
5150 if st_groups[1] == "count":
5151 val = len(instance.disks)
5152 elif st_groups[1] == "sizes":
5153 val = [disk.size for disk in instance.disks]
5154 elif st_groups[1] == "size":
5156 val = instance.FindDisk(st_groups[2]).size
5157 except errors.OpPrereqError:
5160 assert False, "Unhandled disk parameter"
5161 elif st_groups[0] == "nic":
5162 if st_groups[1] == "count":
5163 val = len(instance.nics)
5164 elif st_groups[1] == "macs":
5165 val = [nic.mac for nic in instance.nics]
5166 elif st_groups[1] == "ips":
5167 val = [nic.ip for nic in instance.nics]
5168 elif st_groups[1] == "modes":
5169 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5170 elif st_groups[1] == "links":
5171 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5172 elif st_groups[1] == "bridges":
5175 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5176 val.append(nicp[constants.NIC_LINK])
5181 nic_idx = int(st_groups[2])
5182 if nic_idx >= len(instance.nics):
5185 if st_groups[1] == "mac":
5186 val = instance.nics[nic_idx].mac
5187 elif st_groups[1] == "ip":
5188 val = instance.nics[nic_idx].ip
5189 elif st_groups[1] == "mode":
5190 val = i_nicp[nic_idx][constants.NIC_MODE]
5191 elif st_groups[1] == "link":
5192 val = i_nicp[nic_idx][constants.NIC_LINK]
5193 elif st_groups[1] == "bridge":
5194 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5195 if nic_mode == constants.NIC_MODE_BRIDGED:
5196 val = i_nicp[nic_idx][constants.NIC_LINK]
5200 assert False, "Unhandled NIC parameter"
5202 assert False, ("Declared but unhandled variable parameter '%s'" %
5205 assert False, "Declared but unhandled parameter '%s'" % field
5212 class LUFailoverInstance(LogicalUnit):
5213 """Failover an instance.
5216 HPATH = "instance-failover"
5217 HTYPE = constants.HTYPE_INSTANCE
5219 ("instance_name", _TNonEmptyString),
5220 ("ignore_consistency", _TBool),
5222 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5225 def ExpandNames(self):
5226 self._ExpandAndLockInstance()
5227 self.needed_locks[locking.LEVEL_NODE] = []
5228 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5230 def DeclareLocks(self, level):
5231 if level == locking.LEVEL_NODE:
5232 self._LockInstancesNodes()
5234 def BuildHooksEnv(self):
5237 This runs on master, primary and secondary nodes of the instance.
5240 instance = self.instance
5241 source_node = instance.primary_node
5242 target_node = instance.secondary_nodes[0]
5244 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5245 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5246 "OLD_PRIMARY": source_node,
5247 "OLD_SECONDARY": target_node,
5248 "NEW_PRIMARY": target_node,
5249 "NEW_SECONDARY": source_node,
5251 env.update(_BuildInstanceHookEnvByObject(self, instance))
5252 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5254 nl_post.append(source_node)
5255 return env, nl, nl_post
5257 def CheckPrereq(self):
5258 """Check prerequisites.
5260 This checks that the instance is in the cluster.
5263 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5264 assert self.instance is not None, \
5265 "Cannot retrieve locked instance %s" % self.op.instance_name
5267 bep = self.cfg.GetClusterInfo().FillBE(instance)
5268 if instance.disk_template not in constants.DTS_NET_MIRROR:
5269 raise errors.OpPrereqError("Instance's disk layout is not"
5270 " network mirrored, cannot failover.",
5273 secondary_nodes = instance.secondary_nodes
5274 if not secondary_nodes:
5275 raise errors.ProgrammerError("no secondary node but using "
5276 "a mirrored disk template")
5278 target_node = secondary_nodes[0]
5279 _CheckNodeOnline(self, target_node)
5280 _CheckNodeNotDrained(self, target_node)
5281 if instance.admin_up:
5282 # check memory requirements on the secondary node
5283 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5284 instance.name, bep[constants.BE_MEMORY],
5285 instance.hypervisor)
5287 self.LogInfo("Not checking memory on the secondary node as"
5288 " instance will not be started")
5290 # check bridge existance
5291 _CheckInstanceBridgesExist(self, instance, node=target_node)
5293 def Exec(self, feedback_fn):
5294 """Failover an instance.
5296 The failover is done by shutting it down on its present node and
5297 starting it on the secondary.
5300 instance = self.instance
5302 source_node = instance.primary_node
5303 target_node = instance.secondary_nodes[0]
5305 if instance.admin_up:
5306 feedback_fn("* checking disk consistency between source and target")
5307 for dev in instance.disks:
5308 # for drbd, these are drbd over lvm
5309 if not _CheckDiskConsistency(self, dev, target_node, False):
5310 if not self.op.ignore_consistency:
5311 raise errors.OpExecError("Disk %s is degraded on target node,"
5312 " aborting failover." % dev.iv_name)
5314 feedback_fn("* not checking disk consistency as instance is not running")
5316 feedback_fn("* shutting down instance on source node")
5317 logging.info("Shutting down instance %s on node %s",
5318 instance.name, source_node)
5320 result = self.rpc.call_instance_shutdown(source_node, instance,
5321 self.op.shutdown_timeout)
5322 msg = result.fail_msg
5324 if self.op.ignore_consistency:
5325 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5326 " Proceeding anyway. Please make sure node"
5327 " %s is down. Error details: %s",
5328 instance.name, source_node, source_node, msg)
5330 raise errors.OpExecError("Could not shutdown instance %s on"
5332 (instance.name, source_node, msg))
5334 feedback_fn("* deactivating the instance's disks on source node")
5335 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5336 raise errors.OpExecError("Can't shut down the instance's disks.")
5338 instance.primary_node = target_node
5339 # distribute new instance config to the other nodes
5340 self.cfg.Update(instance, feedback_fn)
5342 # Only start the instance if it's marked as up
5343 if instance.admin_up:
5344 feedback_fn("* activating the instance's disks on target node")
5345 logging.info("Starting instance %s on node %s",
5346 instance.name, target_node)
5348 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5349 ignore_secondaries=True)
5351 _ShutdownInstanceDisks(self, instance)
5352 raise errors.OpExecError("Can't activate the instance's disks")
5354 feedback_fn("* starting the instance on the target node")
5355 result = self.rpc.call_instance_start(target_node, instance, None, None)
5356 msg = result.fail_msg
5358 _ShutdownInstanceDisks(self, instance)
5359 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5360 (instance.name, target_node, msg))
5363 class LUMigrateInstance(LogicalUnit):
5364 """Migrate an instance.
5366 This is migration without shutting down, compared to the failover,
5367 which is done with shutdown.
5370 HPATH = "instance-migrate"
5371 HTYPE = constants.HTYPE_INSTANCE
5373 ("instance_name", _TNonEmptyString),
5375 ("cleanup", _TBool),
5380 def ExpandNames(self):
5381 self._ExpandAndLockInstance()
5383 self.needed_locks[locking.LEVEL_NODE] = []
5384 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5386 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5387 self.op.live, self.op.cleanup)
5388 self.tasklets = [self._migrater]
5390 def DeclareLocks(self, level):
5391 if level == locking.LEVEL_NODE:
5392 self._LockInstancesNodes()
5394 def BuildHooksEnv(self):
5397 This runs on master, primary and secondary nodes of the instance.
5400 instance = self._migrater.instance
5401 source_node = instance.primary_node
5402 target_node = instance.secondary_nodes[0]
5403 env = _BuildInstanceHookEnvByObject(self, instance)
5404 env["MIGRATE_LIVE"] = self.op.live
5405 env["MIGRATE_CLEANUP"] = self.op.cleanup
5407 "OLD_PRIMARY": source_node,
5408 "OLD_SECONDARY": target_node,
5409 "NEW_PRIMARY": target_node,
5410 "NEW_SECONDARY": source_node,
5412 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5414 nl_post.append(source_node)
5415 return env, nl, nl_post
5418 class LUMoveInstance(LogicalUnit):
5419 """Move an instance by data-copying.
5422 HPATH = "instance-move"
5423 HTYPE = constants.HTYPE_INSTANCE
5425 ("instance_name", _TNonEmptyString),
5426 ("target_node", _TNonEmptyString),
5428 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5431 def ExpandNames(self):
5432 self._ExpandAndLockInstance()
5433 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5434 self.op.target_node = target_node
5435 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5436 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5438 def DeclareLocks(self, level):
5439 if level == locking.LEVEL_NODE:
5440 self._LockInstancesNodes(primary_only=True)
5442 def BuildHooksEnv(self):
5445 This runs on master, primary and secondary nodes of the instance.
5449 "TARGET_NODE": self.op.target_node,
5450 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5452 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5453 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5454 self.op.target_node]
5457 def CheckPrereq(self):
5458 """Check prerequisites.
5460 This checks that the instance is in the cluster.
5463 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5464 assert self.instance is not None, \
5465 "Cannot retrieve locked instance %s" % self.op.instance_name
5467 node = self.cfg.GetNodeInfo(self.op.target_node)
5468 assert node is not None, \
5469 "Cannot retrieve locked node %s" % self.op.target_node
5471 self.target_node = target_node = node.name
5473 if target_node == instance.primary_node:
5474 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5475 (instance.name, target_node),
5478 bep = self.cfg.GetClusterInfo().FillBE(instance)
5480 for idx, dsk in enumerate(instance.disks):
5481 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5482 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5483 " cannot copy" % idx, errors.ECODE_STATE)
5485 _CheckNodeOnline(self, target_node)
5486 _CheckNodeNotDrained(self, target_node)
5488 if instance.admin_up:
5489 # check memory requirements on the secondary node
5490 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5491 instance.name, bep[constants.BE_MEMORY],
5492 instance.hypervisor)
5494 self.LogInfo("Not checking memory on the secondary node as"
5495 " instance will not be started")
5497 # check bridge existance
5498 _CheckInstanceBridgesExist(self, instance, node=target_node)
5500 def Exec(self, feedback_fn):
5501 """Move an instance.
5503 The move is done by shutting it down on its present node, copying
5504 the data over (slow) and starting it on the new node.
5507 instance = self.instance
5509 source_node = instance.primary_node
5510 target_node = self.target_node
5512 self.LogInfo("Shutting down instance %s on source node %s",
5513 instance.name, source_node)
5515 result = self.rpc.call_instance_shutdown(source_node, instance,
5516 self.op.shutdown_timeout)
5517 msg = result.fail_msg
5519 if self.op.ignore_consistency:
5520 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5521 " Proceeding anyway. Please make sure node"
5522 " %s is down. Error details: %s",
5523 instance.name, source_node, source_node, msg)
5525 raise errors.OpExecError("Could not shutdown instance %s on"
5527 (instance.name, source_node, msg))
5529 # create the target disks
5531 _CreateDisks(self, instance, target_node=target_node)
5532 except errors.OpExecError:
5533 self.LogWarning("Device creation failed, reverting...")
5535 _RemoveDisks(self, instance, target_node=target_node)
5537 self.cfg.ReleaseDRBDMinors(instance.name)
5540 cluster_name = self.cfg.GetClusterInfo().cluster_name
5543 # activate, get path, copy the data over
5544 for idx, disk in enumerate(instance.disks):
5545 self.LogInfo("Copying data for disk %d", idx)
5546 result = self.rpc.call_blockdev_assemble(target_node, disk,
5547 instance.name, True)
5549 self.LogWarning("Can't assemble newly created disk %d: %s",
5550 idx, result.fail_msg)
5551 errs.append(result.fail_msg)
5553 dev_path = result.payload
5554 result = self.rpc.call_blockdev_export(source_node, disk,
5555 target_node, dev_path,
5558 self.LogWarning("Can't copy data over for disk %d: %s",
5559 idx, result.fail_msg)
5560 errs.append(result.fail_msg)
5564 self.LogWarning("Some disks failed to copy, aborting")
5566 _RemoveDisks(self, instance, target_node=target_node)
5568 self.cfg.ReleaseDRBDMinors(instance.name)
5569 raise errors.OpExecError("Errors during disk copy: %s" %
5572 instance.primary_node = target_node
5573 self.cfg.Update(instance, feedback_fn)
5575 self.LogInfo("Removing the disks on the original node")
5576 _RemoveDisks(self, instance, target_node=source_node)
5578 # Only start the instance if it's marked as up
5579 if instance.admin_up:
5580 self.LogInfo("Starting instance %s on node %s",
5581 instance.name, target_node)
5583 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5584 ignore_secondaries=True)
5586 _ShutdownInstanceDisks(self, instance)
5587 raise errors.OpExecError("Can't activate the instance's disks")
5589 result = self.rpc.call_instance_start(target_node, instance, None, None)
5590 msg = result.fail_msg
5592 _ShutdownInstanceDisks(self, instance)
5593 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5594 (instance.name, target_node, msg))
5597 class LUMigrateNode(LogicalUnit):
5598 """Migrate all instances from a node.
5601 HPATH = "node-migrate"
5602 HTYPE = constants.HTYPE_NODE
5604 ("node_name", _TNonEmptyString),
5609 def ExpandNames(self):
5610 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5612 self.needed_locks = {
5613 locking.LEVEL_NODE: [self.op.node_name],
5616 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5618 # Create tasklets for migrating instances for all instances on this node
5622 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5623 logging.debug("Migrating instance %s", inst.name)
5624 names.append(inst.name)
5626 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5628 self.tasklets = tasklets
5630 # Declare instance locks
5631 self.needed_locks[locking.LEVEL_INSTANCE] = names
5633 def DeclareLocks(self, level):
5634 if level == locking.LEVEL_NODE:
5635 self._LockInstancesNodes()
5637 def BuildHooksEnv(self):
5640 This runs on the master, the primary and all the secondaries.
5644 "NODE_NAME": self.op.node_name,
5647 nl = [self.cfg.GetMasterNode()]
5649 return (env, nl, nl)
5652 class TLMigrateInstance(Tasklet):
5653 def __init__(self, lu, instance_name, live, cleanup):
5654 """Initializes this class.
5657 Tasklet.__init__(self, lu)
5660 self.instance_name = instance_name
5662 self.cleanup = cleanup
5664 def CheckPrereq(self):
5665 """Check prerequisites.
5667 This checks that the instance is in the cluster.
5670 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5671 instance = self.cfg.GetInstanceInfo(instance_name)
5672 assert instance is not None
5674 if instance.disk_template != constants.DT_DRBD8:
5675 raise errors.OpPrereqError("Instance's disk layout is not"
5676 " drbd8, cannot migrate.", errors.ECODE_STATE)
5678 secondary_nodes = instance.secondary_nodes
5679 if not secondary_nodes:
5680 raise errors.ConfigurationError("No secondary node but using"
5681 " drbd8 disk template")
5683 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5685 target_node = secondary_nodes[0]
5686 # check memory requirements on the secondary node
5687 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5688 instance.name, i_be[constants.BE_MEMORY],
5689 instance.hypervisor)
5691 # check bridge existance
5692 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5694 if not self.cleanup:
5695 _CheckNodeNotDrained(self.lu, target_node)
5696 result = self.rpc.call_instance_migratable(instance.primary_node,
5698 result.Raise("Can't migrate, please use failover",
5699 prereq=True, ecode=errors.ECODE_STATE)
5701 self.instance = instance
5703 def _WaitUntilSync(self):
5704 """Poll with custom rpc for disk sync.
5706 This uses our own step-based rpc call.
5709 self.feedback_fn("* wait until resync is done")
5713 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5715 self.instance.disks)
5717 for node, nres in result.items():
5718 nres.Raise("Cannot resync disks on node %s" % node)
5719 node_done, node_percent = nres.payload
5720 all_done = all_done and node_done
5721 if node_percent is not None:
5722 min_percent = min(min_percent, node_percent)
5724 if min_percent < 100:
5725 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5728 def _EnsureSecondary(self, node):
5729 """Demote a node to secondary.
5732 self.feedback_fn("* switching node %s to secondary mode" % node)
5734 for dev in self.instance.disks:
5735 self.cfg.SetDiskID(dev, node)
5737 result = self.rpc.call_blockdev_close(node, self.instance.name,
5738 self.instance.disks)
5739 result.Raise("Cannot change disk to secondary on node %s" % node)
5741 def _GoStandalone(self):
5742 """Disconnect from the network.
5745 self.feedback_fn("* changing into standalone mode")
5746 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5747 self.instance.disks)
5748 for node, nres in result.items():
5749 nres.Raise("Cannot disconnect disks node %s" % node)
5751 def _GoReconnect(self, multimaster):
5752 """Reconnect to the network.
5758 msg = "single-master"
5759 self.feedback_fn("* changing disks into %s mode" % msg)
5760 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5761 self.instance.disks,
5762 self.instance.name, multimaster)
5763 for node, nres in result.items():
5764 nres.Raise("Cannot change disks config on node %s" % node)
5766 def _ExecCleanup(self):
5767 """Try to cleanup after a failed migration.
5769 The cleanup is done by:
5770 - check that the instance is running only on one node
5771 (and update the config if needed)
5772 - change disks on its secondary node to secondary
5773 - wait until disks are fully synchronized
5774 - disconnect from the network
5775 - change disks into single-master mode
5776 - wait again until disks are fully synchronized
5779 instance = self.instance
5780 target_node = self.target_node
5781 source_node = self.source_node
5783 # check running on only one node
5784 self.feedback_fn("* checking where the instance actually runs"
5785 " (if this hangs, the hypervisor might be in"
5787 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5788 for node, result in ins_l.items():
5789 result.Raise("Can't contact node %s" % node)
5791 runningon_source = instance.name in ins_l[source_node].payload
5792 runningon_target = instance.name in ins_l[target_node].payload
5794 if runningon_source and runningon_target:
5795 raise errors.OpExecError("Instance seems to be running on two nodes,"
5796 " or the hypervisor is confused. You will have"
5797 " to ensure manually that it runs only on one"
5798 " and restart this operation.")
5800 if not (runningon_source or runningon_target):
5801 raise errors.OpExecError("Instance does not seem to be running at all."
5802 " In this case, it's safer to repair by"
5803 " running 'gnt-instance stop' to ensure disk"
5804 " shutdown, and then restarting it.")
5806 if runningon_target:
5807 # the migration has actually succeeded, we need to update the config
5808 self.feedback_fn("* instance running on secondary node (%s),"
5809 " updating config" % target_node)
5810 instance.primary_node = target_node
5811 self.cfg.Update(instance, self.feedback_fn)
5812 demoted_node = source_node
5814 self.feedback_fn("* instance confirmed to be running on its"
5815 " primary node (%s)" % source_node)
5816 demoted_node = target_node
5818 self._EnsureSecondary(demoted_node)
5820 self._WaitUntilSync()
5821 except errors.OpExecError:
5822 # we ignore here errors, since if the device is standalone, it
5823 # won't be able to sync
5825 self._GoStandalone()
5826 self._GoReconnect(False)
5827 self._WaitUntilSync()
5829 self.feedback_fn("* done")
5831 def _RevertDiskStatus(self):
5832 """Try to revert the disk status after a failed migration.
5835 target_node = self.target_node
5837 self._EnsureSecondary(target_node)
5838 self._GoStandalone()
5839 self._GoReconnect(False)
5840 self._WaitUntilSync()
5841 except errors.OpExecError, err:
5842 self.lu.LogWarning("Migration failed and I can't reconnect the"
5843 " drives: error '%s'\n"
5844 "Please look and recover the instance status" %
5847 def _AbortMigration(self):
5848 """Call the hypervisor code to abort a started migration.
5851 instance = self.instance
5852 target_node = self.target_node
5853 migration_info = self.migration_info
5855 abort_result = self.rpc.call_finalize_migration(target_node,
5859 abort_msg = abort_result.fail_msg
5861 logging.error("Aborting migration failed on target node %s: %s",
5862 target_node, abort_msg)
5863 # Don't raise an exception here, as we stil have to try to revert the
5864 # disk status, even if this step failed.
5866 def _ExecMigration(self):
5867 """Migrate an instance.
5869 The migrate is done by:
5870 - change the disks into dual-master mode
5871 - wait until disks are fully synchronized again
5872 - migrate the instance
5873 - change disks on the new secondary node (the old primary) to secondary
5874 - wait until disks are fully synchronized
5875 - change disks into single-master mode
5878 instance = self.instance
5879 target_node = self.target_node
5880 source_node = self.source_node
5882 self.feedback_fn("* checking disk consistency between source and target")
5883 for dev in instance.disks:
5884 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5885 raise errors.OpExecError("Disk %s is degraded or not fully"
5886 " synchronized on target node,"
5887 " aborting migrate." % dev.iv_name)
5889 # First get the migration information from the remote node
5890 result = self.rpc.call_migration_info(source_node, instance)
5891 msg = result.fail_msg
5893 log_err = ("Failed fetching source migration information from %s: %s" %
5895 logging.error(log_err)
5896 raise errors.OpExecError(log_err)
5898 self.migration_info = migration_info = result.payload
5900 # Then switch the disks to master/master mode
5901 self._EnsureSecondary(target_node)
5902 self._GoStandalone()
5903 self._GoReconnect(True)
5904 self._WaitUntilSync()
5906 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5907 result = self.rpc.call_accept_instance(target_node,
5910 self.nodes_ip[target_node])
5912 msg = result.fail_msg
5914 logging.error("Instance pre-migration failed, trying to revert"
5915 " disk status: %s", msg)
5916 self.feedback_fn("Pre-migration failed, aborting")
5917 self._AbortMigration()
5918 self._RevertDiskStatus()
5919 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5920 (instance.name, msg))
5922 self.feedback_fn("* migrating instance to %s" % target_node)
5924 result = self.rpc.call_instance_migrate(source_node, instance,
5925 self.nodes_ip[target_node],
5927 msg = result.fail_msg
5929 logging.error("Instance migration failed, trying to revert"
5930 " disk status: %s", msg)
5931 self.feedback_fn("Migration failed, aborting")
5932 self._AbortMigration()
5933 self._RevertDiskStatus()
5934 raise errors.OpExecError("Could not migrate instance %s: %s" %
5935 (instance.name, msg))
5938 instance.primary_node = target_node
5939 # distribute new instance config to the other nodes
5940 self.cfg.Update(instance, self.feedback_fn)
5942 result = self.rpc.call_finalize_migration(target_node,
5946 msg = result.fail_msg
5948 logging.error("Instance migration succeeded, but finalization failed:"
5950 raise errors.OpExecError("Could not finalize instance migration: %s" %
5953 self._EnsureSecondary(source_node)
5954 self._WaitUntilSync()
5955 self._GoStandalone()
5956 self._GoReconnect(False)
5957 self._WaitUntilSync()
5959 self.feedback_fn("* done")
5961 def Exec(self, feedback_fn):
5962 """Perform the migration.
5965 feedback_fn("Migrating instance %s" % self.instance.name)
5967 self.feedback_fn = feedback_fn
5969 self.source_node = self.instance.primary_node
5970 self.target_node = self.instance.secondary_nodes[0]
5971 self.all_nodes = [self.source_node, self.target_node]
5973 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5974 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5978 return self._ExecCleanup()
5980 return self._ExecMigration()
5983 def _CreateBlockDev(lu, node, instance, device, force_create,
5985 """Create a tree of block devices on a given node.
5987 If this device type has to be created on secondaries, create it and
5990 If not, just recurse to children keeping the same 'force' value.
5992 @param lu: the lu on whose behalf we execute
5993 @param node: the node on which to create the device
5994 @type instance: L{objects.Instance}
5995 @param instance: the instance which owns the device
5996 @type device: L{objects.Disk}
5997 @param device: the device to create
5998 @type force_create: boolean
5999 @param force_create: whether to force creation of this device; this
6000 will be change to True whenever we find a device which has
6001 CreateOnSecondary() attribute
6002 @param info: the extra 'metadata' we should attach to the device
6003 (this will be represented as a LVM tag)
6004 @type force_open: boolean
6005 @param force_open: this parameter will be passes to the
6006 L{backend.BlockdevCreate} function where it specifies
6007 whether we run on primary or not, and it affects both
6008 the child assembly and the device own Open() execution
6011 if device.CreateOnSecondary():
6015 for child in device.children:
6016 _CreateBlockDev(lu, node, instance, child, force_create,
6019 if not force_create:
6022 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6025 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6026 """Create a single block device on a given node.
6028 This will not recurse over children of the device, so they must be
6031 @param lu: the lu on whose behalf we execute
6032 @param node: the node on which to create the device
6033 @type instance: L{objects.Instance}
6034 @param instance: the instance which owns the device
6035 @type device: L{objects.Disk}
6036 @param device: the device to create
6037 @param info: the extra 'metadata' we should attach to the device
6038 (this will be represented as a LVM tag)
6039 @type force_open: boolean
6040 @param force_open: this parameter will be passes to the
6041 L{backend.BlockdevCreate} function where it specifies
6042 whether we run on primary or not, and it affects both
6043 the child assembly and the device own Open() execution
6046 lu.cfg.SetDiskID(device, node)
6047 result = lu.rpc.call_blockdev_create(node, device, device.size,
6048 instance.name, force_open, info)
6049 result.Raise("Can't create block device %s on"
6050 " node %s for instance %s" % (device, node, instance.name))
6051 if device.physical_id is None:
6052 device.physical_id = result.payload
6055 def _GenerateUniqueNames(lu, exts):
6056 """Generate a suitable LV name.
6058 This will generate a logical volume name for the given instance.
6063 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6064 results.append("%s%s" % (new_id, val))
6068 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6070 """Generate a drbd8 device complete with its children.
6073 port = lu.cfg.AllocatePort()
6074 vgname = lu.cfg.GetVGName()
6075 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6076 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6077 logical_id=(vgname, names[0]))
6078 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6079 logical_id=(vgname, names[1]))
6080 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6081 logical_id=(primary, secondary, port,
6084 children=[dev_data, dev_meta],
6089 def _GenerateDiskTemplate(lu, template_name,
6090 instance_name, primary_node,
6091 secondary_nodes, disk_info,
6092 file_storage_dir, file_driver,
6094 """Generate the entire disk layout for a given template type.
6097 #TODO: compute space requirements
6099 vgname = lu.cfg.GetVGName()
6100 disk_count = len(disk_info)
6102 if template_name == constants.DT_DISKLESS:
6104 elif template_name == constants.DT_PLAIN:
6105 if len(secondary_nodes) != 0:
6106 raise errors.ProgrammerError("Wrong template configuration")
6108 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6109 for i in range(disk_count)])
6110 for idx, disk in enumerate(disk_info):
6111 disk_index = idx + base_index
6112 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6113 logical_id=(vgname, names[idx]),
6114 iv_name="disk/%d" % disk_index,
6116 disks.append(disk_dev)
6117 elif template_name == constants.DT_DRBD8:
6118 if len(secondary_nodes) != 1:
6119 raise errors.ProgrammerError("Wrong template configuration")
6120 remote_node = secondary_nodes[0]
6121 minors = lu.cfg.AllocateDRBDMinor(
6122 [primary_node, remote_node] * len(disk_info), instance_name)
6125 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6126 for i in range(disk_count)]):
6127 names.append(lv_prefix + "_data")
6128 names.append(lv_prefix + "_meta")
6129 for idx, disk in enumerate(disk_info):
6130 disk_index = idx + base_index
6131 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6132 disk["size"], names[idx*2:idx*2+2],
6133 "disk/%d" % disk_index,
6134 minors[idx*2], minors[idx*2+1])
6135 disk_dev.mode = disk["mode"]
6136 disks.append(disk_dev)
6137 elif template_name == constants.DT_FILE:
6138 if len(secondary_nodes) != 0:
6139 raise errors.ProgrammerError("Wrong template configuration")
6141 _RequireFileStorage()
6143 for idx, disk in enumerate(disk_info):
6144 disk_index = idx + base_index
6145 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6146 iv_name="disk/%d" % disk_index,
6147 logical_id=(file_driver,
6148 "%s/disk%d" % (file_storage_dir,
6151 disks.append(disk_dev)
6153 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6157 def _GetInstanceInfoText(instance):
6158 """Compute that text that should be added to the disk's metadata.
6161 return "originstname+%s" % instance.name
6164 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6165 """Create all disks for an instance.
6167 This abstracts away some work from AddInstance.
6169 @type lu: L{LogicalUnit}
6170 @param lu: the logical unit on whose behalf we execute
6171 @type instance: L{objects.Instance}
6172 @param instance: the instance whose disks we should create
6174 @param to_skip: list of indices to skip
6175 @type target_node: string
6176 @param target_node: if passed, overrides the target node for creation
6178 @return: the success of the creation
6181 info = _GetInstanceInfoText(instance)
6182 if target_node is None:
6183 pnode = instance.primary_node
6184 all_nodes = instance.all_nodes
6189 if instance.disk_template == constants.DT_FILE:
6190 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6191 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6193 result.Raise("Failed to create directory '%s' on"
6194 " node %s" % (file_storage_dir, pnode))
6196 # Note: this needs to be kept in sync with adding of disks in
6197 # LUSetInstanceParams
6198 for idx, device in enumerate(instance.disks):
6199 if to_skip and idx in to_skip:
6201 logging.info("Creating volume %s for instance %s",
6202 device.iv_name, instance.name)
6204 for node in all_nodes:
6205 f_create = node == pnode
6206 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6209 def _RemoveDisks(lu, instance, target_node=None):
6210 """Remove all disks for an instance.
6212 This abstracts away some work from `AddInstance()` and
6213 `RemoveInstance()`. Note that in case some of the devices couldn't
6214 be removed, the removal will continue with the other ones (compare
6215 with `_CreateDisks()`).
6217 @type lu: L{LogicalUnit}
6218 @param lu: the logical unit on whose behalf we execute
6219 @type instance: L{objects.Instance}
6220 @param instance: the instance whose disks we should remove
6221 @type target_node: string
6222 @param target_node: used to override the node on which to remove the disks
6224 @return: the success of the removal
6227 logging.info("Removing block devices for instance %s", instance.name)
6230 for device in instance.disks:
6232 edata = [(target_node, device)]
6234 edata = device.ComputeNodeTree(instance.primary_node)
6235 for node, disk in edata:
6236 lu.cfg.SetDiskID(disk, node)
6237 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6239 lu.LogWarning("Could not remove block device %s on node %s,"
6240 " continuing anyway: %s", device.iv_name, node, msg)
6243 if instance.disk_template == constants.DT_FILE:
6244 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6248 tgt = instance.primary_node
6249 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6251 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6252 file_storage_dir, instance.primary_node, result.fail_msg)
6258 def _ComputeDiskSize(disk_template, disks):
6259 """Compute disk size requirements in the volume group
6262 # Required free disk space as a function of disk and swap space
6264 constants.DT_DISKLESS: None,
6265 constants.DT_PLAIN: sum(d["size"] for d in disks),
6266 # 128 MB are added for drbd metadata for each disk
6267 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6268 constants.DT_FILE: None,
6271 if disk_template not in req_size_dict:
6272 raise errors.ProgrammerError("Disk template '%s' size requirement"
6273 " is unknown" % disk_template)
6275 return req_size_dict[disk_template]
6278 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6279 """Hypervisor parameter validation.
6281 This function abstract the hypervisor parameter validation to be
6282 used in both instance create and instance modify.
6284 @type lu: L{LogicalUnit}
6285 @param lu: the logical unit for which we check
6286 @type nodenames: list
6287 @param nodenames: the list of nodes on which we should check
6288 @type hvname: string
6289 @param hvname: the name of the hypervisor we should use
6290 @type hvparams: dict
6291 @param hvparams: the parameters which we need to check
6292 @raise errors.OpPrereqError: if the parameters are not valid
6295 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6298 for node in nodenames:
6302 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6305 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6306 """OS parameters validation.
6308 @type lu: L{LogicalUnit}
6309 @param lu: the logical unit for which we check
6310 @type required: boolean
6311 @param required: whether the validation should fail if the OS is not
6313 @type nodenames: list
6314 @param nodenames: the list of nodes on which we should check
6315 @type osname: string
6316 @param osname: the name of the hypervisor we should use
6317 @type osparams: dict
6318 @param osparams: the parameters which we need to check
6319 @raise errors.OpPrereqError: if the parameters are not valid
6322 result = lu.rpc.call_os_validate(required, nodenames, osname,
6323 [constants.OS_VALIDATE_PARAMETERS],
6325 for node, nres in result.items():
6326 # we don't check for offline cases since this should be run only
6327 # against the master node and/or an instance's nodes
6328 nres.Raise("OS Parameters validation failed on node %s" % node)
6329 if not nres.payload:
6330 lu.LogInfo("OS %s not found on node %s, validation skipped",
6334 class LUCreateInstance(LogicalUnit):
6335 """Create an instance.
6338 HPATH = "instance-add"
6339 HTYPE = constants.HTYPE_INSTANCE
6341 ("instance_name", _TNonEmptyString),
6342 ("mode", _TElemOf(constants.INSTANCE_CREATE_MODES)),
6344 ("wait_for_sync", _TBool),
6345 ("ip_check", _TBool),
6346 ("disks", _TListOf(_TDict)),
6347 ("nics", _TListOf(_TDict)),
6348 ("hvparams", _TDict),
6349 ("beparams", _TDict),
6350 ("osparams", _TDict),
6353 ("name_check", True),
6354 ("no_install", False),
6356 ("force_variant", False),
6357 ("source_handshake", None),
6358 ("source_x509_ca", None),
6359 ("source_instance_name", None),
6364 ("iallocator", None),
6365 ("hypervisor", None),
6366 ("disk_template", None),
6367 ("identify_defaults", None),
6371 def CheckArguments(self):
6375 # do not require name_check to ease forward/backward compatibility
6377 if self.op.no_install and self.op.start:
6378 self.LogInfo("No-installation mode selected, disabling startup")
6379 self.op.start = False
6380 # validate/normalize the instance name
6381 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6382 if self.op.ip_check and not self.op.name_check:
6383 # TODO: make the ip check more flexible and not depend on the name check
6384 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6387 # check nics' parameter names
6388 for nic in self.op.nics:
6389 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6391 # check disks. parameter names and consistent adopt/no-adopt strategy
6392 has_adopt = has_no_adopt = False
6393 for disk in self.op.disks:
6394 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6399 if has_adopt and has_no_adopt:
6400 raise errors.OpPrereqError("Either all disks are adopted or none is",
6403 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6404 raise errors.OpPrereqError("Disk adoption is not supported for the"
6405 " '%s' disk template" %
6406 self.op.disk_template,
6408 if self.op.iallocator is not None:
6409 raise errors.OpPrereqError("Disk adoption not allowed with an"
6410 " iallocator script", errors.ECODE_INVAL)
6411 if self.op.mode == constants.INSTANCE_IMPORT:
6412 raise errors.OpPrereqError("Disk adoption not allowed for"
6413 " instance import", errors.ECODE_INVAL)
6415 self.adopt_disks = has_adopt
6417 # instance name verification
6418 if self.op.name_check:
6419 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6420 self.op.instance_name = self.hostname1.name
6421 # used in CheckPrereq for ip ping check
6422 self.check_ip = self.hostname1.ip
6423 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6424 raise errors.OpPrereqError("Remote imports require names to be checked" %
6427 self.check_ip = None
6429 # file storage checks
6430 if (self.op.file_driver and
6431 not self.op.file_driver in constants.FILE_DRIVER):
6432 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6433 self.op.file_driver, errors.ECODE_INVAL)
6435 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6436 raise errors.OpPrereqError("File storage directory path not absolute",
6439 ### Node/iallocator related checks
6440 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6441 raise errors.OpPrereqError("One and only one of iallocator and primary"
6442 " node must be given",
6445 self._cds = _GetClusterDomainSecret()
6447 if self.op.mode == constants.INSTANCE_IMPORT:
6448 # On import force_variant must be True, because if we forced it at
6449 # initial install, our only chance when importing it back is that it
6451 self.op.force_variant = True
6453 if self.op.no_install:
6454 self.LogInfo("No-installation mode has no effect during import")
6456 elif self.op.mode == constants.INSTANCE_CREATE:
6457 if self.op.os_type is None:
6458 raise errors.OpPrereqError("No guest OS specified",
6460 if self.op.disk_template is None:
6461 raise errors.OpPrereqError("No disk template specified",
6464 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6465 # Check handshake to ensure both clusters have the same domain secret
6466 src_handshake = self.op.source_handshake
6467 if not src_handshake:
6468 raise errors.OpPrereqError("Missing source handshake",
6471 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6474 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6477 # Load and check source CA
6478 self.source_x509_ca_pem = self.op.source_x509_ca
6479 if not self.source_x509_ca_pem:
6480 raise errors.OpPrereqError("Missing source X509 CA",
6484 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6486 except OpenSSL.crypto.Error, err:
6487 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6488 (err, ), errors.ECODE_INVAL)
6490 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6491 if errcode is not None:
6492 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6495 self.source_x509_ca = cert
6497 src_instance_name = self.op.source_instance_name
6498 if not src_instance_name:
6499 raise errors.OpPrereqError("Missing source instance name",
6502 self.source_instance_name = \
6503 utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6506 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6507 self.op.mode, errors.ECODE_INVAL)
6509 def ExpandNames(self):
6510 """ExpandNames for CreateInstance.
6512 Figure out the right locks for instance creation.
6515 self.needed_locks = {}
6517 instance_name = self.op.instance_name
6518 # this is just a preventive check, but someone might still add this
6519 # instance in the meantime, and creation will fail at lock-add time
6520 if instance_name in self.cfg.GetInstanceList():
6521 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6522 instance_name, errors.ECODE_EXISTS)
6524 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6526 if self.op.iallocator:
6527 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6529 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6530 nodelist = [self.op.pnode]
6531 if self.op.snode is not None:
6532 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6533 nodelist.append(self.op.snode)
6534 self.needed_locks[locking.LEVEL_NODE] = nodelist
6536 # in case of import lock the source node too
6537 if self.op.mode == constants.INSTANCE_IMPORT:
6538 src_node = self.op.src_node
6539 src_path = self.op.src_path
6541 if src_path is None:
6542 self.op.src_path = src_path = self.op.instance_name
6544 if src_node is None:
6545 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6546 self.op.src_node = None
6547 if os.path.isabs(src_path):
6548 raise errors.OpPrereqError("Importing an instance from an absolute"
6549 " path requires a source node option.",
6552 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6553 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6554 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6555 if not os.path.isabs(src_path):
6556 self.op.src_path = src_path = \
6557 utils.PathJoin(constants.EXPORT_DIR, src_path)
6559 def _RunAllocator(self):
6560 """Run the allocator based on input opcode.
6563 nics = [n.ToDict() for n in self.nics]
6564 ial = IAllocator(self.cfg, self.rpc,
6565 mode=constants.IALLOCATOR_MODE_ALLOC,
6566 name=self.op.instance_name,
6567 disk_template=self.op.disk_template,
6570 vcpus=self.be_full[constants.BE_VCPUS],
6571 mem_size=self.be_full[constants.BE_MEMORY],
6574 hypervisor=self.op.hypervisor,
6577 ial.Run(self.op.iallocator)
6580 raise errors.OpPrereqError("Can't compute nodes using"
6581 " iallocator '%s': %s" %
6582 (self.op.iallocator, ial.info),
6584 if len(ial.result) != ial.required_nodes:
6585 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6586 " of nodes (%s), required %s" %
6587 (self.op.iallocator, len(ial.result),
6588 ial.required_nodes), errors.ECODE_FAULT)
6589 self.op.pnode = ial.result[0]
6590 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6591 self.op.instance_name, self.op.iallocator,
6592 utils.CommaJoin(ial.result))
6593 if ial.required_nodes == 2:
6594 self.op.snode = ial.result[1]
6596 def BuildHooksEnv(self):
6599 This runs on master, primary and secondary nodes of the instance.
6603 "ADD_MODE": self.op.mode,
6605 if self.op.mode == constants.INSTANCE_IMPORT:
6606 env["SRC_NODE"] = self.op.src_node
6607 env["SRC_PATH"] = self.op.src_path
6608 env["SRC_IMAGES"] = self.src_images
6610 env.update(_BuildInstanceHookEnv(
6611 name=self.op.instance_name,
6612 primary_node=self.op.pnode,
6613 secondary_nodes=self.secondaries,
6614 status=self.op.start,
6615 os_type=self.op.os_type,
6616 memory=self.be_full[constants.BE_MEMORY],
6617 vcpus=self.be_full[constants.BE_VCPUS],
6618 nics=_NICListToTuple(self, self.nics),
6619 disk_template=self.op.disk_template,
6620 disks=[(d["size"], d["mode"]) for d in self.disks],
6623 hypervisor_name=self.op.hypervisor,
6626 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6630 def _ReadExportInfo(self):
6631 """Reads the export information from disk.
6633 It will override the opcode source node and path with the actual
6634 information, if these two were not specified before.
6636 @return: the export information
6639 assert self.op.mode == constants.INSTANCE_IMPORT
6641 src_node = self.op.src_node
6642 src_path = self.op.src_path
6644 if src_node is None:
6645 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6646 exp_list = self.rpc.call_export_list(locked_nodes)
6648 for node in exp_list:
6649 if exp_list[node].fail_msg:
6651 if src_path in exp_list[node].payload:
6653 self.op.src_node = src_node = node
6654 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6658 raise errors.OpPrereqError("No export found for relative path %s" %
6659 src_path, errors.ECODE_INVAL)
6661 _CheckNodeOnline(self, src_node)
6662 result = self.rpc.call_export_info(src_node, src_path)
6663 result.Raise("No export or invalid export found in dir %s" % src_path)
6665 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6666 if not export_info.has_section(constants.INISECT_EXP):
6667 raise errors.ProgrammerError("Corrupted export config",
6668 errors.ECODE_ENVIRON)
6670 ei_version = export_info.get(constants.INISECT_EXP, "version")
6671 if (int(ei_version) != constants.EXPORT_VERSION):
6672 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6673 (ei_version, constants.EXPORT_VERSION),
6674 errors.ECODE_ENVIRON)
6677 def _ReadExportParams(self, einfo):
6678 """Use export parameters as defaults.
6680 In case the opcode doesn't specify (as in override) some instance
6681 parameters, then try to use them from the export information, if
6685 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6687 if self.op.disk_template is None:
6688 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6689 self.op.disk_template = einfo.get(constants.INISECT_INS,
6692 raise errors.OpPrereqError("No disk template specified and the export"
6693 " is missing the disk_template information",
6696 if not self.op.disks:
6697 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6699 # TODO: import the disk iv_name too
6700 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6701 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6702 disks.append({"size": disk_sz})
6703 self.op.disks = disks
6705 raise errors.OpPrereqError("No disk info specified and the export"
6706 " is missing the disk information",
6709 if (not self.op.nics and
6710 einfo.has_option(constants.INISECT_INS, "nic_count")):
6712 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6714 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6715 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6720 if (self.op.hypervisor is None and
6721 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6722 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6723 if einfo.has_section(constants.INISECT_HYP):
6724 # use the export parameters but do not override the ones
6725 # specified by the user
6726 for name, value in einfo.items(constants.INISECT_HYP):
6727 if name not in self.op.hvparams:
6728 self.op.hvparams[name] = value
6730 if einfo.has_section(constants.INISECT_BEP):
6731 # use the parameters, without overriding
6732 for name, value in einfo.items(constants.INISECT_BEP):
6733 if name not in self.op.beparams:
6734 self.op.beparams[name] = value
6736 # try to read the parameters old style, from the main section
6737 for name in constants.BES_PARAMETERS:
6738 if (name not in self.op.beparams and
6739 einfo.has_option(constants.INISECT_INS, name)):
6740 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6742 if einfo.has_section(constants.INISECT_OSP):
6743 # use the parameters, without overriding
6744 for name, value in einfo.items(constants.INISECT_OSP):
6745 if name not in self.op.osparams:
6746 self.op.osparams[name] = value
6748 def _RevertToDefaults(self, cluster):
6749 """Revert the instance parameters to the default values.
6753 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6754 for name in self.op.hvparams.keys():
6755 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6756 del self.op.hvparams[name]
6758 be_defs = cluster.SimpleFillBE({})
6759 for name in self.op.beparams.keys():
6760 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6761 del self.op.beparams[name]
6763 nic_defs = cluster.SimpleFillNIC({})
6764 for nic in self.op.nics:
6765 for name in constants.NICS_PARAMETERS:
6766 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6769 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6770 for name in self.op.osparams.keys():
6771 if name in os_defs and os_defs[name] == self.op.osparams[name]:
6772 del self.op.osparams[name]
6774 def CheckPrereq(self):
6775 """Check prerequisites.
6778 if self.op.mode == constants.INSTANCE_IMPORT:
6779 export_info = self._ReadExportInfo()
6780 self._ReadExportParams(export_info)
6782 _CheckDiskTemplate(self.op.disk_template)
6784 if (not self.cfg.GetVGName() and
6785 self.op.disk_template not in constants.DTS_NOT_LVM):
6786 raise errors.OpPrereqError("Cluster does not support lvm-based"
6787 " instances", errors.ECODE_STATE)
6789 if self.op.hypervisor is None:
6790 self.op.hypervisor = self.cfg.GetHypervisorType()
6792 cluster = self.cfg.GetClusterInfo()
6793 enabled_hvs = cluster.enabled_hypervisors
6794 if self.op.hypervisor not in enabled_hvs:
6795 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6796 " cluster (%s)" % (self.op.hypervisor,
6797 ",".join(enabled_hvs)),
6800 # check hypervisor parameter syntax (locally)
6801 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6802 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6804 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6805 hv_type.CheckParameterSyntax(filled_hvp)
6806 self.hv_full = filled_hvp
6807 # check that we don't specify global parameters on an instance
6808 _CheckGlobalHvParams(self.op.hvparams)
6810 # fill and remember the beparams dict
6811 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6812 self.be_full = cluster.SimpleFillBE(self.op.beparams)
6814 # build os parameters
6815 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6817 # now that hvp/bep are in final format, let's reset to defaults,
6819 if self.op.identify_defaults:
6820 self._RevertToDefaults(cluster)
6824 for idx, nic in enumerate(self.op.nics):
6825 nic_mode_req = nic.get("mode", None)
6826 nic_mode = nic_mode_req
6827 if nic_mode is None:
6828 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6830 # in routed mode, for the first nic, the default ip is 'auto'
6831 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6832 default_ip_mode = constants.VALUE_AUTO
6834 default_ip_mode = constants.VALUE_NONE
6836 # ip validity checks
6837 ip = nic.get("ip", default_ip_mode)
6838 if ip is None or ip.lower() == constants.VALUE_NONE:
6840 elif ip.lower() == constants.VALUE_AUTO:
6841 if not self.op.name_check:
6842 raise errors.OpPrereqError("IP address set to auto but name checks"
6843 " have been skipped. Aborting.",
6845 nic_ip = self.hostname1.ip
6847 if not utils.IsValidIP4(ip):
6848 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6849 " like a valid IP" % ip,
6853 # TODO: check the ip address for uniqueness
6854 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6855 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6858 # MAC address verification
6859 mac = nic.get("mac", constants.VALUE_AUTO)
6860 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6861 mac = utils.NormalizeAndValidateMac(mac)
6864 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6865 except errors.ReservationError:
6866 raise errors.OpPrereqError("MAC address %s already in use"
6867 " in cluster" % mac,
6868 errors.ECODE_NOTUNIQUE)
6870 # bridge verification
6871 bridge = nic.get("bridge", None)
6872 link = nic.get("link", None)
6874 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6875 " at the same time", errors.ECODE_INVAL)
6876 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6877 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6884 nicparams[constants.NIC_MODE] = nic_mode_req
6886 nicparams[constants.NIC_LINK] = link
6888 check_params = cluster.SimpleFillNIC(nicparams)
6889 objects.NIC.CheckParameterSyntax(check_params)
6890 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6892 # disk checks/pre-build
6894 for disk in self.op.disks:
6895 mode = disk.get("mode", constants.DISK_RDWR)
6896 if mode not in constants.DISK_ACCESS_SET:
6897 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6898 mode, errors.ECODE_INVAL)
6899 size = disk.get("size", None)
6901 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6904 except (TypeError, ValueError):
6905 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6907 new_disk = {"size": size, "mode": mode}
6909 new_disk["adopt"] = disk["adopt"]
6910 self.disks.append(new_disk)
6912 if self.op.mode == constants.INSTANCE_IMPORT:
6914 # Check that the new instance doesn't have less disks than the export
6915 instance_disks = len(self.disks)
6916 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6917 if instance_disks < export_disks:
6918 raise errors.OpPrereqError("Not enough disks to import."
6919 " (instance: %d, export: %d)" %
6920 (instance_disks, export_disks),
6924 for idx in range(export_disks):
6925 option = 'disk%d_dump' % idx
6926 if export_info.has_option(constants.INISECT_INS, option):
6927 # FIXME: are the old os-es, disk sizes, etc. useful?
6928 export_name = export_info.get(constants.INISECT_INS, option)
6929 image = utils.PathJoin(self.op.src_path, export_name)
6930 disk_images.append(image)
6932 disk_images.append(False)
6934 self.src_images = disk_images
6936 old_name = export_info.get(constants.INISECT_INS, 'name')
6938 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6939 except (TypeError, ValueError), err:
6940 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6941 " an integer: %s" % str(err),
6943 if self.op.instance_name == old_name:
6944 for idx, nic in enumerate(self.nics):
6945 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6946 nic_mac_ini = 'nic%d_mac' % idx
6947 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6949 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6951 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6952 if self.op.ip_check:
6953 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6954 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6955 (self.check_ip, self.op.instance_name),
6956 errors.ECODE_NOTUNIQUE)
6958 #### mac address generation
6959 # By generating here the mac address both the allocator and the hooks get
6960 # the real final mac address rather than the 'auto' or 'generate' value.
6961 # There is a race condition between the generation and the instance object
6962 # creation, which means that we know the mac is valid now, but we're not
6963 # sure it will be when we actually add the instance. If things go bad
6964 # adding the instance will abort because of a duplicate mac, and the
6965 # creation job will fail.
6966 for nic in self.nics:
6967 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6968 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6972 if self.op.iallocator is not None:
6973 self._RunAllocator()
6975 #### node related checks
6977 # check primary node
6978 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6979 assert self.pnode is not None, \
6980 "Cannot retrieve locked node %s" % self.op.pnode
6982 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6983 pnode.name, errors.ECODE_STATE)
6985 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6986 pnode.name, errors.ECODE_STATE)
6988 self.secondaries = []
6990 # mirror node verification
6991 if self.op.disk_template in constants.DTS_NET_MIRROR:
6992 if self.op.snode is None:
6993 raise errors.OpPrereqError("The networked disk templates need"
6994 " a mirror node", errors.ECODE_INVAL)
6995 if self.op.snode == pnode.name:
6996 raise errors.OpPrereqError("The secondary node cannot be the"
6997 " primary node.", errors.ECODE_INVAL)
6998 _CheckNodeOnline(self, self.op.snode)
6999 _CheckNodeNotDrained(self, self.op.snode)
7000 self.secondaries.append(self.op.snode)
7002 nodenames = [pnode.name] + self.secondaries
7004 req_size = _ComputeDiskSize(self.op.disk_template,
7007 # Check lv size requirements, if not adopting
7008 if req_size is not None and not self.adopt_disks:
7009 _CheckNodesFreeDisk(self, nodenames, req_size)
7011 if self.adopt_disks: # instead, we must check the adoption data
7012 all_lvs = set([i["adopt"] for i in self.disks])
7013 if len(all_lvs) != len(self.disks):
7014 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7016 for lv_name in all_lvs:
7018 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7019 except errors.ReservationError:
7020 raise errors.OpPrereqError("LV named %s used by another instance" %
7021 lv_name, errors.ECODE_NOTUNIQUE)
7023 node_lvs = self.rpc.call_lv_list([pnode.name],
7024 self.cfg.GetVGName())[pnode.name]
7025 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7026 node_lvs = node_lvs.payload
7027 delta = all_lvs.difference(node_lvs.keys())
7029 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7030 utils.CommaJoin(delta),
7032 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7034 raise errors.OpPrereqError("Online logical volumes found, cannot"
7035 " adopt: %s" % utils.CommaJoin(online_lvs),
7037 # update the size of disk based on what is found
7038 for dsk in self.disks:
7039 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7041 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7043 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7044 # check OS parameters (remotely)
7045 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7047 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7049 # memory check on primary node
7051 _CheckNodeFreeMemory(self, self.pnode.name,
7052 "creating instance %s" % self.op.instance_name,
7053 self.be_full[constants.BE_MEMORY],
7056 self.dry_run_result = list(nodenames)
7058 def Exec(self, feedback_fn):
7059 """Create and add the instance to the cluster.
7062 instance = self.op.instance_name
7063 pnode_name = self.pnode.name
7065 ht_kind = self.op.hypervisor
7066 if ht_kind in constants.HTS_REQ_PORT:
7067 network_port = self.cfg.AllocatePort()
7071 if constants.ENABLE_FILE_STORAGE:
7072 # this is needed because os.path.join does not accept None arguments
7073 if self.op.file_storage_dir is None:
7074 string_file_storage_dir = ""
7076 string_file_storage_dir = self.op.file_storage_dir
7078 # build the full file storage dir path
7079 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7080 string_file_storage_dir, instance)
7082 file_storage_dir = ""
7084 disks = _GenerateDiskTemplate(self,
7085 self.op.disk_template,
7086 instance, pnode_name,
7090 self.op.file_driver,
7093 iobj = objects.Instance(name=instance, os=self.op.os_type,
7094 primary_node=pnode_name,
7095 nics=self.nics, disks=disks,
7096 disk_template=self.op.disk_template,
7098 network_port=network_port,
7099 beparams=self.op.beparams,
7100 hvparams=self.op.hvparams,
7101 hypervisor=self.op.hypervisor,
7102 osparams=self.op.osparams,
7105 if self.adopt_disks:
7106 # rename LVs to the newly-generated names; we need to construct
7107 # 'fake' LV disks with the old data, plus the new unique_id
7108 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7110 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7111 rename_to.append(t_dsk.logical_id)
7112 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7113 self.cfg.SetDiskID(t_dsk, pnode_name)
7114 result = self.rpc.call_blockdev_rename(pnode_name,
7115 zip(tmp_disks, rename_to))
7116 result.Raise("Failed to rename adoped LVs")
7118 feedback_fn("* creating instance disks...")
7120 _CreateDisks(self, iobj)
7121 except errors.OpExecError:
7122 self.LogWarning("Device creation failed, reverting...")
7124 _RemoveDisks(self, iobj)
7126 self.cfg.ReleaseDRBDMinors(instance)
7129 feedback_fn("adding instance %s to cluster config" % instance)
7131 self.cfg.AddInstance(iobj, self.proc.GetECId())
7133 # Declare that we don't want to remove the instance lock anymore, as we've
7134 # added the instance to the config
7135 del self.remove_locks[locking.LEVEL_INSTANCE]
7136 # Unlock all the nodes
7137 if self.op.mode == constants.INSTANCE_IMPORT:
7138 nodes_keep = [self.op.src_node]
7139 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7140 if node != self.op.src_node]
7141 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7142 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7144 self.context.glm.release(locking.LEVEL_NODE)
7145 del self.acquired_locks[locking.LEVEL_NODE]
7147 if self.op.wait_for_sync:
7148 disk_abort = not _WaitForSync(self, iobj)
7149 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7150 # make sure the disks are not degraded (still sync-ing is ok)
7152 feedback_fn("* checking mirrors status")
7153 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7158 _RemoveDisks(self, iobj)
7159 self.cfg.RemoveInstance(iobj.name)
7160 # Make sure the instance lock gets removed
7161 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7162 raise errors.OpExecError("There are some degraded disks for"
7165 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7166 if self.op.mode == constants.INSTANCE_CREATE:
7167 if not self.op.no_install:
7168 feedback_fn("* running the instance OS create scripts...")
7169 # FIXME: pass debug option from opcode to backend
7170 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7171 self.op.debug_level)
7172 result.Raise("Could not add os for instance %s"
7173 " on node %s" % (instance, pnode_name))
7175 elif self.op.mode == constants.INSTANCE_IMPORT:
7176 feedback_fn("* running the instance OS import scripts...")
7180 for idx, image in enumerate(self.src_images):
7184 # FIXME: pass debug option from opcode to backend
7185 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7186 constants.IEIO_FILE, (image, ),
7187 constants.IEIO_SCRIPT,
7188 (iobj.disks[idx], idx),
7190 transfers.append(dt)
7193 masterd.instance.TransferInstanceData(self, feedback_fn,
7194 self.op.src_node, pnode_name,
7195 self.pnode.secondary_ip,
7197 if not compat.all(import_result):
7198 self.LogWarning("Some disks for instance %s on node %s were not"
7199 " imported successfully" % (instance, pnode_name))
7201 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7202 feedback_fn("* preparing remote import...")
7203 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7204 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7206 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7207 self.source_x509_ca,
7208 self._cds, timeouts)
7209 if not compat.all(disk_results):
7210 # TODO: Should the instance still be started, even if some disks
7211 # failed to import (valid for local imports, too)?
7212 self.LogWarning("Some disks for instance %s on node %s were not"
7213 " imported successfully" % (instance, pnode_name))
7215 # Run rename script on newly imported instance
7216 assert iobj.name == instance
7217 feedback_fn("Running rename script for %s" % instance)
7218 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7219 self.source_instance_name,
7220 self.op.debug_level)
7222 self.LogWarning("Failed to run rename script for %s on node"
7223 " %s: %s" % (instance, pnode_name, result.fail_msg))
7226 # also checked in the prereq part
7227 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7231 iobj.admin_up = True
7232 self.cfg.Update(iobj, feedback_fn)
7233 logging.info("Starting instance %s on node %s", instance, pnode_name)
7234 feedback_fn("* starting instance...")
7235 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7236 result.Raise("Could not start instance")
7238 return list(iobj.all_nodes)
7241 class LUConnectConsole(NoHooksLU):
7242 """Connect to an instance's console.
7244 This is somewhat special in that it returns the command line that
7245 you need to run on the master node in order to connect to the
7249 _OP_REQP = [("instance_name", _TNonEmptyString)]
7252 def ExpandNames(self):
7253 self._ExpandAndLockInstance()
7255 def CheckPrereq(self):
7256 """Check prerequisites.
7258 This checks that the instance is in the cluster.
7261 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7262 assert self.instance is not None, \
7263 "Cannot retrieve locked instance %s" % self.op.instance_name
7264 _CheckNodeOnline(self, self.instance.primary_node)
7266 def Exec(self, feedback_fn):
7267 """Connect to the console of an instance
7270 instance = self.instance
7271 node = instance.primary_node
7273 node_insts = self.rpc.call_instance_list([node],
7274 [instance.hypervisor])[node]
7275 node_insts.Raise("Can't get node information from %s" % node)
7277 if instance.name not in node_insts.payload:
7278 raise errors.OpExecError("Instance %s is not running." % instance.name)
7280 logging.debug("Connecting to console of %s on %s", instance.name, node)
7282 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7283 cluster = self.cfg.GetClusterInfo()
7284 # beparams and hvparams are passed separately, to avoid editing the
7285 # instance and then saving the defaults in the instance itself.
7286 hvparams = cluster.FillHV(instance)
7287 beparams = cluster.FillBE(instance)
7288 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7291 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7294 class LUReplaceDisks(LogicalUnit):
7295 """Replace the disks of an instance.
7298 HPATH = "mirrors-replace"
7299 HTYPE = constants.HTYPE_INSTANCE
7301 ("instance_name", _TNonEmptyString),
7302 ("mode", _TElemOf(constants.REPLACE_MODES)),
7303 ("disks", _TListOf(_TPositiveInt)),
7306 ("remote_node", None),
7307 ("iallocator", None),
7308 ("early_release", None),
7312 def CheckArguments(self):
7313 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7316 def ExpandNames(self):
7317 self._ExpandAndLockInstance()
7319 if self.op.iallocator is not None:
7320 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7322 elif self.op.remote_node is not None:
7323 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7324 self.op.remote_node = remote_node
7326 # Warning: do not remove the locking of the new secondary here
7327 # unless DRBD8.AddChildren is changed to work in parallel;
7328 # currently it doesn't since parallel invocations of
7329 # FindUnusedMinor will conflict
7330 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7331 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7334 self.needed_locks[locking.LEVEL_NODE] = []
7335 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7337 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7338 self.op.iallocator, self.op.remote_node,
7339 self.op.disks, False, self.op.early_release)
7341 self.tasklets = [self.replacer]
7343 def DeclareLocks(self, level):
7344 # If we're not already locking all nodes in the set we have to declare the
7345 # instance's primary/secondary nodes.
7346 if (level == locking.LEVEL_NODE and
7347 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7348 self._LockInstancesNodes()
7350 def BuildHooksEnv(self):
7353 This runs on the master, the primary and all the secondaries.
7356 instance = self.replacer.instance
7358 "MODE": self.op.mode,
7359 "NEW_SECONDARY": self.op.remote_node,
7360 "OLD_SECONDARY": instance.secondary_nodes[0],
7362 env.update(_BuildInstanceHookEnvByObject(self, instance))
7364 self.cfg.GetMasterNode(),
7365 instance.primary_node,
7367 if self.op.remote_node is not None:
7368 nl.append(self.op.remote_node)
7372 class TLReplaceDisks(Tasklet):
7373 """Replaces disks for an instance.
7375 Note: Locking is not within the scope of this class.
7378 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7379 disks, delay_iallocator, early_release):
7380 """Initializes this class.
7383 Tasklet.__init__(self, lu)
7386 self.instance_name = instance_name
7388 self.iallocator_name = iallocator_name
7389 self.remote_node = remote_node
7391 self.delay_iallocator = delay_iallocator
7392 self.early_release = early_release
7395 self.instance = None
7396 self.new_node = None
7397 self.target_node = None
7398 self.other_node = None
7399 self.remote_node_info = None
7400 self.node_secondary_ip = None
7403 def CheckArguments(mode, remote_node, iallocator):
7404 """Helper function for users of this class.
7407 # check for valid parameter combination
7408 if mode == constants.REPLACE_DISK_CHG:
7409 if remote_node is None and iallocator is None:
7410 raise errors.OpPrereqError("When changing the secondary either an"
7411 " iallocator script must be used or the"
7412 " new node given", errors.ECODE_INVAL)
7414 if remote_node is not None and iallocator is not None:
7415 raise errors.OpPrereqError("Give either the iallocator or the new"
7416 " secondary, not both", errors.ECODE_INVAL)
7418 elif remote_node is not None or iallocator is not None:
7419 # Not replacing the secondary
7420 raise errors.OpPrereqError("The iallocator and new node options can"
7421 " only be used when changing the"
7422 " secondary node", errors.ECODE_INVAL)
7425 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7426 """Compute a new secondary node using an IAllocator.
7429 ial = IAllocator(lu.cfg, lu.rpc,
7430 mode=constants.IALLOCATOR_MODE_RELOC,
7432 relocate_from=relocate_from)
7434 ial.Run(iallocator_name)
7437 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7438 " %s" % (iallocator_name, ial.info),
7441 if len(ial.result) != ial.required_nodes:
7442 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7443 " of nodes (%s), required %s" %
7445 len(ial.result), ial.required_nodes),
7448 remote_node_name = ial.result[0]
7450 lu.LogInfo("Selected new secondary for instance '%s': %s",
7451 instance_name, remote_node_name)
7453 return remote_node_name
7455 def _FindFaultyDisks(self, node_name):
7456 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7459 def CheckPrereq(self):
7460 """Check prerequisites.
7462 This checks that the instance is in the cluster.
7465 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7466 assert instance is not None, \
7467 "Cannot retrieve locked instance %s" % self.instance_name
7469 if instance.disk_template != constants.DT_DRBD8:
7470 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7471 " instances", errors.ECODE_INVAL)
7473 if len(instance.secondary_nodes) != 1:
7474 raise errors.OpPrereqError("The instance has a strange layout,"
7475 " expected one secondary but found %d" %
7476 len(instance.secondary_nodes),
7479 if not self.delay_iallocator:
7480 self._CheckPrereq2()
7482 def _CheckPrereq2(self):
7483 """Check prerequisites, second part.
7485 This function should always be part of CheckPrereq. It was separated and is
7486 now called from Exec because during node evacuation iallocator was only
7487 called with an unmodified cluster model, not taking planned changes into
7491 instance = self.instance
7492 secondary_node = instance.secondary_nodes[0]
7494 if self.iallocator_name is None:
7495 remote_node = self.remote_node
7497 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7498 instance.name, instance.secondary_nodes)
7500 if remote_node is not None:
7501 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7502 assert self.remote_node_info is not None, \
7503 "Cannot retrieve locked node %s" % remote_node
7505 self.remote_node_info = None
7507 if remote_node == self.instance.primary_node:
7508 raise errors.OpPrereqError("The specified node is the primary node of"
7509 " the instance.", errors.ECODE_INVAL)
7511 if remote_node == secondary_node:
7512 raise errors.OpPrereqError("The specified node is already the"
7513 " secondary node of the instance.",
7516 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7517 constants.REPLACE_DISK_CHG):
7518 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7521 if self.mode == constants.REPLACE_DISK_AUTO:
7522 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7523 faulty_secondary = self._FindFaultyDisks(secondary_node)
7525 if faulty_primary and faulty_secondary:
7526 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7527 " one node and can not be repaired"
7528 " automatically" % self.instance_name,
7532 self.disks = faulty_primary
7533 self.target_node = instance.primary_node
7534 self.other_node = secondary_node
7535 check_nodes = [self.target_node, self.other_node]
7536 elif faulty_secondary:
7537 self.disks = faulty_secondary
7538 self.target_node = secondary_node
7539 self.other_node = instance.primary_node
7540 check_nodes = [self.target_node, self.other_node]
7546 # Non-automatic modes
7547 if self.mode == constants.REPLACE_DISK_PRI:
7548 self.target_node = instance.primary_node
7549 self.other_node = secondary_node
7550 check_nodes = [self.target_node, self.other_node]
7552 elif self.mode == constants.REPLACE_DISK_SEC:
7553 self.target_node = secondary_node
7554 self.other_node = instance.primary_node
7555 check_nodes = [self.target_node, self.other_node]
7557 elif self.mode == constants.REPLACE_DISK_CHG:
7558 self.new_node = remote_node
7559 self.other_node = instance.primary_node
7560 self.target_node = secondary_node
7561 check_nodes = [self.new_node, self.other_node]
7563 _CheckNodeNotDrained(self.lu, remote_node)
7565 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7566 assert old_node_info is not None
7567 if old_node_info.offline and not self.early_release:
7568 # doesn't make sense to delay the release
7569 self.early_release = True
7570 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7571 " early-release mode", secondary_node)
7574 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7577 # If not specified all disks should be replaced
7579 self.disks = range(len(self.instance.disks))
7581 for node in check_nodes:
7582 _CheckNodeOnline(self.lu, node)
7584 # Check whether disks are valid
7585 for disk_idx in self.disks:
7586 instance.FindDisk(disk_idx)
7588 # Get secondary node IP addresses
7591 for node_name in [self.target_node, self.other_node, self.new_node]:
7592 if node_name is not None:
7593 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7595 self.node_secondary_ip = node_2nd_ip
7597 def Exec(self, feedback_fn):
7598 """Execute disk replacement.
7600 This dispatches the disk replacement to the appropriate handler.
7603 if self.delay_iallocator:
7604 self._CheckPrereq2()
7607 feedback_fn("No disks need replacement")
7610 feedback_fn("Replacing disk(s) %s for %s" %
7611 (utils.CommaJoin(self.disks), self.instance.name))
7613 activate_disks = (not self.instance.admin_up)
7615 # Activate the instance disks if we're replacing them on a down instance
7617 _StartInstanceDisks(self.lu, self.instance, True)
7620 # Should we replace the secondary node?
7621 if self.new_node is not None:
7622 fn = self._ExecDrbd8Secondary
7624 fn = self._ExecDrbd8DiskOnly
7626 return fn(feedback_fn)
7629 # Deactivate the instance disks if we're replacing them on a
7632 _SafeShutdownInstanceDisks(self.lu, self.instance)
7634 def _CheckVolumeGroup(self, nodes):
7635 self.lu.LogInfo("Checking volume groups")
7637 vgname = self.cfg.GetVGName()
7639 # Make sure volume group exists on all involved nodes
7640 results = self.rpc.call_vg_list(nodes)
7642 raise errors.OpExecError("Can't list volume groups on the nodes")
7646 res.Raise("Error checking node %s" % node)
7647 if vgname not in res.payload:
7648 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7651 def _CheckDisksExistence(self, nodes):
7652 # Check disk existence
7653 for idx, dev in enumerate(self.instance.disks):
7654 if idx not in self.disks:
7658 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7659 self.cfg.SetDiskID(dev, node)
7661 result = self.rpc.call_blockdev_find(node, dev)
7663 msg = result.fail_msg
7664 if msg or not result.payload:
7666 msg = "disk not found"
7667 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7670 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7671 for idx, dev in enumerate(self.instance.disks):
7672 if idx not in self.disks:
7675 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7678 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7680 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7681 " replace disks for instance %s" %
7682 (node_name, self.instance.name))
7684 def _CreateNewStorage(self, node_name):
7685 vgname = self.cfg.GetVGName()
7688 for idx, dev in enumerate(self.instance.disks):
7689 if idx not in self.disks:
7692 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7694 self.cfg.SetDiskID(dev, node_name)
7696 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7697 names = _GenerateUniqueNames(self.lu, lv_names)
7699 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7700 logical_id=(vgname, names[0]))
7701 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7702 logical_id=(vgname, names[1]))
7704 new_lvs = [lv_data, lv_meta]
7705 old_lvs = dev.children
7706 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7708 # we pass force_create=True to force the LVM creation
7709 for new_lv in new_lvs:
7710 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7711 _GetInstanceInfoText(self.instance), False)
7715 def _CheckDevices(self, node_name, iv_names):
7716 for name, (dev, _, _) in iv_names.iteritems():
7717 self.cfg.SetDiskID(dev, node_name)
7719 result = self.rpc.call_blockdev_find(node_name, dev)
7721 msg = result.fail_msg
7722 if msg or not result.payload:
7724 msg = "disk not found"
7725 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7728 if result.payload.is_degraded:
7729 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7731 def _RemoveOldStorage(self, node_name, iv_names):
7732 for name, (_, old_lvs, _) in iv_names.iteritems():
7733 self.lu.LogInfo("Remove logical volumes for %s" % name)
7736 self.cfg.SetDiskID(lv, node_name)
7738 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7740 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7741 hint="remove unused LVs manually")
7743 def _ReleaseNodeLock(self, node_name):
7744 """Releases the lock for a given node."""
7745 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7747 def _ExecDrbd8DiskOnly(self, feedback_fn):
7748 """Replace a disk on the primary or secondary for DRBD 8.
7750 The algorithm for replace is quite complicated:
7752 1. for each disk to be replaced:
7754 1. create new LVs on the target node with unique names
7755 1. detach old LVs from the drbd device
7756 1. rename old LVs to name_replaced.<time_t>
7757 1. rename new LVs to old LVs
7758 1. attach the new LVs (with the old names now) to the drbd device
7760 1. wait for sync across all devices
7762 1. for each modified disk:
7764 1. remove old LVs (which have the name name_replaces.<time_t>)
7766 Failures are not very well handled.
7771 # Step: check device activation
7772 self.lu.LogStep(1, steps_total, "Check device existence")
7773 self._CheckDisksExistence([self.other_node, self.target_node])
7774 self._CheckVolumeGroup([self.target_node, self.other_node])
7776 # Step: check other node consistency
7777 self.lu.LogStep(2, steps_total, "Check peer consistency")
7778 self._CheckDisksConsistency(self.other_node,
7779 self.other_node == self.instance.primary_node,
7782 # Step: create new storage
7783 self.lu.LogStep(3, steps_total, "Allocate new storage")
7784 iv_names = self._CreateNewStorage(self.target_node)
7786 # Step: for each lv, detach+rename*2+attach
7787 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7788 for dev, old_lvs, new_lvs in iv_names.itervalues():
7789 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7791 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7793 result.Raise("Can't detach drbd from local storage on node"
7794 " %s for device %s" % (self.target_node, dev.iv_name))
7796 #cfg.Update(instance)
7798 # ok, we created the new LVs, so now we know we have the needed
7799 # storage; as such, we proceed on the target node to rename
7800 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7801 # using the assumption that logical_id == physical_id (which in
7802 # turn is the unique_id on that node)
7804 # FIXME(iustin): use a better name for the replaced LVs
7805 temp_suffix = int(time.time())
7806 ren_fn = lambda d, suff: (d.physical_id[0],
7807 d.physical_id[1] + "_replaced-%s" % suff)
7809 # Build the rename list based on what LVs exist on the node
7810 rename_old_to_new = []
7811 for to_ren in old_lvs:
7812 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7813 if not result.fail_msg and result.payload:
7815 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7817 self.lu.LogInfo("Renaming the old LVs on the target node")
7818 result = self.rpc.call_blockdev_rename(self.target_node,
7820 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7822 # Now we rename the new LVs to the old LVs
7823 self.lu.LogInfo("Renaming the new LVs on the target node")
7824 rename_new_to_old = [(new, old.physical_id)
7825 for old, new in zip(old_lvs, new_lvs)]
7826 result = self.rpc.call_blockdev_rename(self.target_node,
7828 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7830 for old, new in zip(old_lvs, new_lvs):
7831 new.logical_id = old.logical_id
7832 self.cfg.SetDiskID(new, self.target_node)
7834 for disk in old_lvs:
7835 disk.logical_id = ren_fn(disk, temp_suffix)
7836 self.cfg.SetDiskID(disk, self.target_node)
7838 # Now that the new lvs have the old name, we can add them to the device
7839 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7840 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7842 msg = result.fail_msg
7844 for new_lv in new_lvs:
7845 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7848 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7849 hint=("cleanup manually the unused logical"
7851 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7853 dev.children = new_lvs
7855 self.cfg.Update(self.instance, feedback_fn)
7858 if self.early_release:
7859 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7861 self._RemoveOldStorage(self.target_node, iv_names)
7862 # WARNING: we release both node locks here, do not do other RPCs
7863 # than WaitForSync to the primary node
7864 self._ReleaseNodeLock([self.target_node, self.other_node])
7867 # This can fail as the old devices are degraded and _WaitForSync
7868 # does a combined result over all disks, so we don't check its return value
7869 self.lu.LogStep(cstep, steps_total, "Sync devices")
7871 _WaitForSync(self.lu, self.instance)
7873 # Check all devices manually
7874 self._CheckDevices(self.instance.primary_node, iv_names)
7876 # Step: remove old storage
7877 if not self.early_release:
7878 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7880 self._RemoveOldStorage(self.target_node, iv_names)
7882 def _ExecDrbd8Secondary(self, feedback_fn):
7883 """Replace the secondary node for DRBD 8.
7885 The algorithm for replace is quite complicated:
7886 - for all disks of the instance:
7887 - create new LVs on the new node with same names
7888 - shutdown the drbd device on the old secondary
7889 - disconnect the drbd network on the primary
7890 - create the drbd device on the new secondary
7891 - network attach the drbd on the primary, using an artifice:
7892 the drbd code for Attach() will connect to the network if it
7893 finds a device which is connected to the good local disks but
7895 - wait for sync across all devices
7896 - remove all disks from the old secondary
7898 Failures are not very well handled.
7903 # Step: check device activation
7904 self.lu.LogStep(1, steps_total, "Check device existence")
7905 self._CheckDisksExistence([self.instance.primary_node])
7906 self._CheckVolumeGroup([self.instance.primary_node])
7908 # Step: check other node consistency
7909 self.lu.LogStep(2, steps_total, "Check peer consistency")
7910 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7912 # Step: create new storage
7913 self.lu.LogStep(3, steps_total, "Allocate new storage")
7914 for idx, dev in enumerate(self.instance.disks):
7915 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7916 (self.new_node, idx))
7917 # we pass force_create=True to force LVM creation
7918 for new_lv in dev.children:
7919 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7920 _GetInstanceInfoText(self.instance), False)
7922 # Step 4: dbrd minors and drbd setups changes
7923 # after this, we must manually remove the drbd minors on both the
7924 # error and the success paths
7925 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7926 minors = self.cfg.AllocateDRBDMinor([self.new_node
7927 for dev in self.instance.disks],
7929 logging.debug("Allocated minors %r", minors)
7932 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7933 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7934 (self.new_node, idx))
7935 # create new devices on new_node; note that we create two IDs:
7936 # one without port, so the drbd will be activated without
7937 # networking information on the new node at this stage, and one
7938 # with network, for the latter activation in step 4
7939 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7940 if self.instance.primary_node == o_node1:
7943 assert self.instance.primary_node == o_node2, "Three-node instance?"
7946 new_alone_id = (self.instance.primary_node, self.new_node, None,
7947 p_minor, new_minor, o_secret)
7948 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7949 p_minor, new_minor, o_secret)
7951 iv_names[idx] = (dev, dev.children, new_net_id)
7952 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7954 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7955 logical_id=new_alone_id,
7956 children=dev.children,
7959 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7960 _GetInstanceInfoText(self.instance), False)
7961 except errors.GenericError:
7962 self.cfg.ReleaseDRBDMinors(self.instance.name)
7965 # We have new devices, shutdown the drbd on the old secondary
7966 for idx, dev in enumerate(self.instance.disks):
7967 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7968 self.cfg.SetDiskID(dev, self.target_node)
7969 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7971 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7972 "node: %s" % (idx, msg),
7973 hint=("Please cleanup this device manually as"
7974 " soon as possible"))
7976 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7977 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7978 self.node_secondary_ip,
7979 self.instance.disks)\
7980 [self.instance.primary_node]
7982 msg = result.fail_msg
7984 # detaches didn't succeed (unlikely)
7985 self.cfg.ReleaseDRBDMinors(self.instance.name)
7986 raise errors.OpExecError("Can't detach the disks from the network on"
7987 " old node: %s" % (msg,))
7989 # if we managed to detach at least one, we update all the disks of
7990 # the instance to point to the new secondary
7991 self.lu.LogInfo("Updating instance configuration")
7992 for dev, _, new_logical_id in iv_names.itervalues():
7993 dev.logical_id = new_logical_id
7994 self.cfg.SetDiskID(dev, self.instance.primary_node)
7996 self.cfg.Update(self.instance, feedback_fn)
7998 # and now perform the drbd attach
7999 self.lu.LogInfo("Attaching primary drbds to new secondary"
8000 " (standalone => connected)")
8001 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8003 self.node_secondary_ip,
8004 self.instance.disks,
8007 for to_node, to_result in result.items():
8008 msg = to_result.fail_msg
8010 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8012 hint=("please do a gnt-instance info to see the"
8013 " status of disks"))
8015 if self.early_release:
8016 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8018 self._RemoveOldStorage(self.target_node, iv_names)
8019 # WARNING: we release all node locks here, do not do other RPCs
8020 # than WaitForSync to the primary node
8021 self._ReleaseNodeLock([self.instance.primary_node,
8026 # This can fail as the old devices are degraded and _WaitForSync
8027 # does a combined result over all disks, so we don't check its return value
8028 self.lu.LogStep(cstep, steps_total, "Sync devices")
8030 _WaitForSync(self.lu, self.instance)
8032 # Check all devices manually
8033 self._CheckDevices(self.instance.primary_node, iv_names)
8035 # Step: remove old storage
8036 if not self.early_release:
8037 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8038 self._RemoveOldStorage(self.target_node, iv_names)
8041 class LURepairNodeStorage(NoHooksLU):
8042 """Repairs the volume group on a node.
8045 _OP_REQP = [("node_name", _TNonEmptyString)]
8048 def CheckArguments(self):
8049 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8051 _CheckStorageType(self.op.storage_type)
8053 storage_type = self.op.storage_type
8055 if (constants.SO_FIX_CONSISTENCY not in
8056 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8057 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8058 " repaired" % storage_type,
8061 def ExpandNames(self):
8062 self.needed_locks = {
8063 locking.LEVEL_NODE: [self.op.node_name],
8066 def _CheckFaultyDisks(self, instance, node_name):
8067 """Ensure faulty disks abort the opcode or at least warn."""
8069 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8071 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8072 " node '%s'" % (instance.name, node_name),
8074 except errors.OpPrereqError, err:
8075 if self.op.ignore_consistency:
8076 self.proc.LogWarning(str(err.args[0]))
8080 def CheckPrereq(self):
8081 """Check prerequisites.
8084 # Check whether any instance on this node has faulty disks
8085 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8086 if not inst.admin_up:
8088 check_nodes = set(inst.all_nodes)
8089 check_nodes.discard(self.op.node_name)
8090 for inst_node_name in check_nodes:
8091 self._CheckFaultyDisks(inst, inst_node_name)
8093 def Exec(self, feedback_fn):
8094 feedback_fn("Repairing storage unit '%s' on %s ..." %
8095 (self.op.name, self.op.node_name))
8097 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8098 result = self.rpc.call_storage_execute(self.op.node_name,
8099 self.op.storage_type, st_args,
8101 constants.SO_FIX_CONSISTENCY)
8102 result.Raise("Failed to repair storage unit '%s' on %s" %
8103 (self.op.name, self.op.node_name))
8106 class LUNodeEvacuationStrategy(NoHooksLU):
8107 """Computes the node evacuation strategy.
8110 _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
8112 ("remote_node", None),
8113 ("iallocator", None),
8117 def CheckArguments(self):
8118 if self.op.remote_node is not None and self.op.iallocator is not None:
8119 raise errors.OpPrereqError("Give either the iallocator or the new"
8120 " secondary, not both", errors.ECODE_INVAL)
8122 def ExpandNames(self):
8123 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8124 self.needed_locks = locks = {}
8125 if self.op.remote_node is None:
8126 locks[locking.LEVEL_NODE] = locking.ALL_SET
8128 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8129 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8131 def Exec(self, feedback_fn):
8132 if self.op.remote_node is not None:
8134 for node in self.op.nodes:
8135 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8138 if i.primary_node == self.op.remote_node:
8139 raise errors.OpPrereqError("Node %s is the primary node of"
8140 " instance %s, cannot use it as"
8142 (self.op.remote_node, i.name),
8144 result.append([i.name, self.op.remote_node])
8146 ial = IAllocator(self.cfg, self.rpc,
8147 mode=constants.IALLOCATOR_MODE_MEVAC,
8148 evac_nodes=self.op.nodes)
8149 ial.Run(self.op.iallocator, validate=True)
8151 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8157 class LUGrowDisk(LogicalUnit):
8158 """Grow a disk of an instance.
8162 HTYPE = constants.HTYPE_INSTANCE
8164 ("instance_name", _TNonEmptyString),
8167 ("wait_for_sync", _TBool),
8171 def ExpandNames(self):
8172 self._ExpandAndLockInstance()
8173 self.needed_locks[locking.LEVEL_NODE] = []
8174 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8176 def DeclareLocks(self, level):
8177 if level == locking.LEVEL_NODE:
8178 self._LockInstancesNodes()
8180 def BuildHooksEnv(self):
8183 This runs on the master, the primary and all the secondaries.
8187 "DISK": self.op.disk,
8188 "AMOUNT": self.op.amount,
8190 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8191 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8194 def CheckPrereq(self):
8195 """Check prerequisites.
8197 This checks that the instance is in the cluster.
8200 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8201 assert instance is not None, \
8202 "Cannot retrieve locked instance %s" % self.op.instance_name
8203 nodenames = list(instance.all_nodes)
8204 for node in nodenames:
8205 _CheckNodeOnline(self, node)
8207 self.instance = instance
8209 if instance.disk_template not in constants.DTS_GROWABLE:
8210 raise errors.OpPrereqError("Instance's disk layout does not support"
8211 " growing.", errors.ECODE_INVAL)
8213 self.disk = instance.FindDisk(self.op.disk)
8215 if instance.disk_template != constants.DT_FILE:
8216 # TODO: check the free disk space for file, when that feature will be
8218 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8220 def Exec(self, feedback_fn):
8221 """Execute disk grow.
8224 instance = self.instance
8227 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8229 raise errors.OpExecError("Cannot activate block device to grow")
8231 for node in instance.all_nodes:
8232 self.cfg.SetDiskID(disk, node)
8233 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8234 result.Raise("Grow request failed to node %s" % node)
8236 # TODO: Rewrite code to work properly
8237 # DRBD goes into sync mode for a short amount of time after executing the
8238 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8239 # calling "resize" in sync mode fails. Sleeping for a short amount of
8240 # time is a work-around.
8243 disk.RecordGrow(self.op.amount)
8244 self.cfg.Update(instance, feedback_fn)
8245 if self.op.wait_for_sync:
8246 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8248 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8249 " status.\nPlease check the instance.")
8250 if not instance.admin_up:
8251 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8252 elif not instance.admin_up:
8253 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8254 " not supposed to be running because no wait for"
8255 " sync mode was requested.")
8258 class LUQueryInstanceData(NoHooksLU):
8259 """Query runtime instance data.
8263 ("instances", _TListOf(_TNonEmptyString)),
8268 def ExpandNames(self):
8269 self.needed_locks = {}
8270 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8272 if self.op.instances:
8273 self.wanted_names = []
8274 for name in self.op.instances:
8275 full_name = _ExpandInstanceName(self.cfg, name)
8276 self.wanted_names.append(full_name)
8277 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8279 self.wanted_names = None
8280 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8282 self.needed_locks[locking.LEVEL_NODE] = []
8283 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8285 def DeclareLocks(self, level):
8286 if level == locking.LEVEL_NODE:
8287 self._LockInstancesNodes()
8289 def CheckPrereq(self):
8290 """Check prerequisites.
8292 This only checks the optional instance list against the existing names.
8295 if self.wanted_names is None:
8296 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8298 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8299 in self.wanted_names]
8301 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8302 """Returns the status of a block device
8305 if self.op.static or not node:
8308 self.cfg.SetDiskID(dev, node)
8310 result = self.rpc.call_blockdev_find(node, dev)
8314 result.Raise("Can't compute disk status for %s" % instance_name)
8316 status = result.payload
8320 return (status.dev_path, status.major, status.minor,
8321 status.sync_percent, status.estimated_time,
8322 status.is_degraded, status.ldisk_status)
8324 def _ComputeDiskStatus(self, instance, snode, dev):
8325 """Compute block device status.
8328 if dev.dev_type in constants.LDS_DRBD:
8329 # we change the snode then (otherwise we use the one passed in)
8330 if dev.logical_id[0] == instance.primary_node:
8331 snode = dev.logical_id[1]
8333 snode = dev.logical_id[0]
8335 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8337 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8340 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8341 for child in dev.children]
8346 "iv_name": dev.iv_name,
8347 "dev_type": dev.dev_type,
8348 "logical_id": dev.logical_id,
8349 "physical_id": dev.physical_id,
8350 "pstatus": dev_pstatus,
8351 "sstatus": dev_sstatus,
8352 "children": dev_children,
8359 def Exec(self, feedback_fn):
8360 """Gather and return data"""
8363 cluster = self.cfg.GetClusterInfo()
8365 for instance in self.wanted_instances:
8366 if not self.op.static:
8367 remote_info = self.rpc.call_instance_info(instance.primary_node,
8369 instance.hypervisor)
8370 remote_info.Raise("Error checking node %s" % instance.primary_node)
8371 remote_info = remote_info.payload
8372 if remote_info and "state" in remote_info:
8375 remote_state = "down"
8378 if instance.admin_up:
8381 config_state = "down"
8383 disks = [self._ComputeDiskStatus(instance, None, device)
8384 for device in instance.disks]
8387 "name": instance.name,
8388 "config_state": config_state,
8389 "run_state": remote_state,
8390 "pnode": instance.primary_node,
8391 "snodes": instance.secondary_nodes,
8393 # this happens to be the same format used for hooks
8394 "nics": _NICListToTuple(self, instance.nics),
8395 "disk_template": instance.disk_template,
8397 "hypervisor": instance.hypervisor,
8398 "network_port": instance.network_port,
8399 "hv_instance": instance.hvparams,
8400 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8401 "be_instance": instance.beparams,
8402 "be_actual": cluster.FillBE(instance),
8403 "os_instance": instance.osparams,
8404 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8405 "serial_no": instance.serial_no,
8406 "mtime": instance.mtime,
8407 "ctime": instance.ctime,
8408 "uuid": instance.uuid,
8411 result[instance.name] = idict
8416 class LUSetInstanceParams(LogicalUnit):
8417 """Modifies an instances's parameters.
8420 HPATH = "instance-modify"
8421 HTYPE = constants.HTYPE_INSTANCE
8422 _OP_REQP = [("instance_name", _TNonEmptyString)]
8424 ("nics", _EmptyList),
8425 ("disks", _EmptyList),
8426 ("beparams", _EmptyDict),
8427 ("hvparams", _EmptyDict),
8428 ("disk_template", None),
8429 ("remote_node", None),
8431 ("force_variant", False),
8437 def CheckArguments(self):
8438 if not (self.op.nics or self.op.disks or self.op.disk_template or
8439 self.op.hvparams or self.op.beparams or self.op.os_name):
8440 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8442 if self.op.hvparams:
8443 _CheckGlobalHvParams(self.op.hvparams)
8447 for disk_op, disk_dict in self.op.disks:
8448 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8449 if disk_op == constants.DDM_REMOVE:
8452 elif disk_op == constants.DDM_ADD:
8455 if not isinstance(disk_op, int):
8456 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8457 if not isinstance(disk_dict, dict):
8458 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8459 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8461 if disk_op == constants.DDM_ADD:
8462 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8463 if mode not in constants.DISK_ACCESS_SET:
8464 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8466 size = disk_dict.get('size', None)
8468 raise errors.OpPrereqError("Required disk parameter size missing",
8472 except (TypeError, ValueError), err:
8473 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8474 str(err), errors.ECODE_INVAL)
8475 disk_dict['size'] = size
8477 # modification of disk
8478 if 'size' in disk_dict:
8479 raise errors.OpPrereqError("Disk size change not possible, use"
8480 " grow-disk", errors.ECODE_INVAL)
8482 if disk_addremove > 1:
8483 raise errors.OpPrereqError("Only one disk add or remove operation"
8484 " supported at a time", errors.ECODE_INVAL)
8486 if self.op.disks and self.op.disk_template is not None:
8487 raise errors.OpPrereqError("Disk template conversion and other disk"
8488 " changes not supported at the same time",
8491 if self.op.disk_template:
8492 _CheckDiskTemplate(self.op.disk_template)
8493 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8494 self.op.remote_node is None):
8495 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8496 " one requires specifying a secondary node",
8501 for nic_op, nic_dict in self.op.nics:
8502 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8503 if nic_op == constants.DDM_REMOVE:
8506 elif nic_op == constants.DDM_ADD:
8509 if not isinstance(nic_op, int):
8510 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8511 if not isinstance(nic_dict, dict):
8512 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8513 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8515 # nic_dict should be a dict
8516 nic_ip = nic_dict.get('ip', None)
8517 if nic_ip is not None:
8518 if nic_ip.lower() == constants.VALUE_NONE:
8519 nic_dict['ip'] = None
8521 if not utils.IsValidIP4(nic_ip):
8522 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8525 nic_bridge = nic_dict.get('bridge', None)
8526 nic_link = nic_dict.get('link', None)
8527 if nic_bridge and nic_link:
8528 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8529 " at the same time", errors.ECODE_INVAL)
8530 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8531 nic_dict['bridge'] = None
8532 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8533 nic_dict['link'] = None
8535 if nic_op == constants.DDM_ADD:
8536 nic_mac = nic_dict.get('mac', None)
8538 nic_dict['mac'] = constants.VALUE_AUTO
8540 if 'mac' in nic_dict:
8541 nic_mac = nic_dict['mac']
8542 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8543 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8545 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8546 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8547 " modifying an existing nic",
8550 if nic_addremove > 1:
8551 raise errors.OpPrereqError("Only one NIC add or remove operation"
8552 " supported at a time", errors.ECODE_INVAL)
8554 def ExpandNames(self):
8555 self._ExpandAndLockInstance()
8556 self.needed_locks[locking.LEVEL_NODE] = []
8557 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8559 def DeclareLocks(self, level):
8560 if level == locking.LEVEL_NODE:
8561 self._LockInstancesNodes()
8562 if self.op.disk_template and self.op.remote_node:
8563 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8564 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8566 def BuildHooksEnv(self):
8569 This runs on the master, primary and secondaries.
8573 if constants.BE_MEMORY in self.be_new:
8574 args['memory'] = self.be_new[constants.BE_MEMORY]
8575 if constants.BE_VCPUS in self.be_new:
8576 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8577 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8578 # information at all.
8581 nic_override = dict(self.op.nics)
8582 for idx, nic in enumerate(self.instance.nics):
8583 if idx in nic_override:
8584 this_nic_override = nic_override[idx]
8586 this_nic_override = {}
8587 if 'ip' in this_nic_override:
8588 ip = this_nic_override['ip']
8591 if 'mac' in this_nic_override:
8592 mac = this_nic_override['mac']
8595 if idx in self.nic_pnew:
8596 nicparams = self.nic_pnew[idx]
8598 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8599 mode = nicparams[constants.NIC_MODE]
8600 link = nicparams[constants.NIC_LINK]
8601 args['nics'].append((ip, mac, mode, link))
8602 if constants.DDM_ADD in nic_override:
8603 ip = nic_override[constants.DDM_ADD].get('ip', None)
8604 mac = nic_override[constants.DDM_ADD]['mac']
8605 nicparams = self.nic_pnew[constants.DDM_ADD]
8606 mode = nicparams[constants.NIC_MODE]
8607 link = nicparams[constants.NIC_LINK]
8608 args['nics'].append((ip, mac, mode, link))
8609 elif constants.DDM_REMOVE in nic_override:
8610 del args['nics'][-1]
8612 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8613 if self.op.disk_template:
8614 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8615 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8618 def CheckPrereq(self):
8619 """Check prerequisites.
8621 This only checks the instance list against the existing names.
8624 # checking the new params on the primary/secondary nodes
8626 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8627 cluster = self.cluster = self.cfg.GetClusterInfo()
8628 assert self.instance is not None, \
8629 "Cannot retrieve locked instance %s" % self.op.instance_name
8630 pnode = instance.primary_node
8631 nodelist = list(instance.all_nodes)
8634 if self.op.os_name and not self.op.force:
8635 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8636 self.op.force_variant)
8637 instance_os = self.op.os_name
8639 instance_os = instance.os
8641 if self.op.disk_template:
8642 if instance.disk_template == self.op.disk_template:
8643 raise errors.OpPrereqError("Instance already has disk template %s" %
8644 instance.disk_template, errors.ECODE_INVAL)
8646 if (instance.disk_template,
8647 self.op.disk_template) not in self._DISK_CONVERSIONS:
8648 raise errors.OpPrereqError("Unsupported disk template conversion from"
8649 " %s to %s" % (instance.disk_template,
8650 self.op.disk_template),
8652 _CheckInstanceDown(self, instance, "cannot change disk template")
8653 if self.op.disk_template in constants.DTS_NET_MIRROR:
8654 _CheckNodeOnline(self, self.op.remote_node)
8655 _CheckNodeNotDrained(self, self.op.remote_node)
8656 disks = [{"size": d.size} for d in instance.disks]
8657 required = _ComputeDiskSize(self.op.disk_template, disks)
8658 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8660 # hvparams processing
8661 if self.op.hvparams:
8662 hv_type = instance.hypervisor
8663 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8664 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8665 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8668 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8669 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8670 self.hv_new = hv_new # the new actual values
8671 self.hv_inst = i_hvdict # the new dict (without defaults)
8673 self.hv_new = self.hv_inst = {}
8675 # beparams processing
8676 if self.op.beparams:
8677 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8679 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8680 be_new = cluster.SimpleFillBE(i_bedict)
8681 self.be_new = be_new # the new actual values
8682 self.be_inst = i_bedict # the new dict (without defaults)
8684 self.be_new = self.be_inst = {}
8686 # osparams processing
8687 if self.op.osparams:
8688 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8689 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8690 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8691 self.os_inst = i_osdict # the new dict (without defaults)
8693 self.os_new = self.os_inst = {}
8697 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8698 mem_check_list = [pnode]
8699 if be_new[constants.BE_AUTO_BALANCE]:
8700 # either we changed auto_balance to yes or it was from before
8701 mem_check_list.extend(instance.secondary_nodes)
8702 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8703 instance.hypervisor)
8704 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8705 instance.hypervisor)
8706 pninfo = nodeinfo[pnode]
8707 msg = pninfo.fail_msg
8709 # Assume the primary node is unreachable and go ahead
8710 self.warn.append("Can't get info from primary node %s: %s" %
8712 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8713 self.warn.append("Node data from primary node %s doesn't contain"
8714 " free memory information" % pnode)
8715 elif instance_info.fail_msg:
8716 self.warn.append("Can't get instance runtime information: %s" %
8717 instance_info.fail_msg)
8719 if instance_info.payload:
8720 current_mem = int(instance_info.payload['memory'])
8722 # Assume instance not running
8723 # (there is a slight race condition here, but it's not very probable,
8724 # and we have no other way to check)
8726 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8727 pninfo.payload['memory_free'])
8729 raise errors.OpPrereqError("This change will prevent the instance"
8730 " from starting, due to %d MB of memory"
8731 " missing on its primary node" % miss_mem,
8734 if be_new[constants.BE_AUTO_BALANCE]:
8735 for node, nres in nodeinfo.items():
8736 if node not in instance.secondary_nodes:
8740 self.warn.append("Can't get info from secondary node %s: %s" %
8742 elif not isinstance(nres.payload.get('memory_free', None), int):
8743 self.warn.append("Secondary node %s didn't return free"
8744 " memory information" % node)
8745 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8746 self.warn.append("Not enough memory to failover instance to"
8747 " secondary node %s" % node)
8752 for nic_op, nic_dict in self.op.nics:
8753 if nic_op == constants.DDM_REMOVE:
8754 if not instance.nics:
8755 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8758 if nic_op != constants.DDM_ADD:
8760 if not instance.nics:
8761 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8762 " no NICs" % nic_op,
8764 if nic_op < 0 or nic_op >= len(instance.nics):
8765 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8767 (nic_op, len(instance.nics) - 1),
8769 old_nic_params = instance.nics[nic_op].nicparams
8770 old_nic_ip = instance.nics[nic_op].ip
8775 update_params_dict = dict([(key, nic_dict[key])
8776 for key in constants.NICS_PARAMETERS
8777 if key in nic_dict])
8779 if 'bridge' in nic_dict:
8780 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8782 new_nic_params = _GetUpdatedParams(old_nic_params,
8784 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8785 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8786 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8787 self.nic_pinst[nic_op] = new_nic_params
8788 self.nic_pnew[nic_op] = new_filled_nic_params
8789 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8791 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8792 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8793 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8795 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8797 self.warn.append(msg)
8799 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8800 if new_nic_mode == constants.NIC_MODE_ROUTED:
8801 if 'ip' in nic_dict:
8802 nic_ip = nic_dict['ip']
8806 raise errors.OpPrereqError('Cannot set the nic ip to None'
8807 ' on a routed nic', errors.ECODE_INVAL)
8808 if 'mac' in nic_dict:
8809 nic_mac = nic_dict['mac']
8811 raise errors.OpPrereqError('Cannot set the nic mac to None',
8813 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8814 # otherwise generate the mac
8815 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8817 # or validate/reserve the current one
8819 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8820 except errors.ReservationError:
8821 raise errors.OpPrereqError("MAC address %s already in use"
8822 " in cluster" % nic_mac,
8823 errors.ECODE_NOTUNIQUE)
8826 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8827 raise errors.OpPrereqError("Disk operations not supported for"
8828 " diskless instances",
8830 for disk_op, _ in self.op.disks:
8831 if disk_op == constants.DDM_REMOVE:
8832 if len(instance.disks) == 1:
8833 raise errors.OpPrereqError("Cannot remove the last disk of"
8834 " an instance", errors.ECODE_INVAL)
8835 _CheckInstanceDown(self, instance, "cannot remove disks")
8837 if (disk_op == constants.DDM_ADD and
8838 len(instance.nics) >= constants.MAX_DISKS):
8839 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8840 " add more" % constants.MAX_DISKS,
8842 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8844 if disk_op < 0 or disk_op >= len(instance.disks):
8845 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8847 (disk_op, len(instance.disks)),
8852 def _ConvertPlainToDrbd(self, feedback_fn):
8853 """Converts an instance from plain to drbd.
8856 feedback_fn("Converting template to drbd")
8857 instance = self.instance
8858 pnode = instance.primary_node
8859 snode = self.op.remote_node
8861 # create a fake disk info for _GenerateDiskTemplate
8862 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8863 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8864 instance.name, pnode, [snode],
8865 disk_info, None, None, 0)
8866 info = _GetInstanceInfoText(instance)
8867 feedback_fn("Creating aditional volumes...")
8868 # first, create the missing data and meta devices
8869 for disk in new_disks:
8870 # unfortunately this is... not too nice
8871 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8873 for child in disk.children:
8874 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8875 # at this stage, all new LVs have been created, we can rename the
8877 feedback_fn("Renaming original volumes...")
8878 rename_list = [(o, n.children[0].logical_id)
8879 for (o, n) in zip(instance.disks, new_disks)]
8880 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8881 result.Raise("Failed to rename original LVs")
8883 feedback_fn("Initializing DRBD devices...")
8884 # all child devices are in place, we can now create the DRBD devices
8885 for disk in new_disks:
8886 for node in [pnode, snode]:
8887 f_create = node == pnode
8888 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8890 # at this point, the instance has been modified
8891 instance.disk_template = constants.DT_DRBD8
8892 instance.disks = new_disks
8893 self.cfg.Update(instance, feedback_fn)
8895 # disks are created, waiting for sync
8896 disk_abort = not _WaitForSync(self, instance)
8898 raise errors.OpExecError("There are some degraded disks for"
8899 " this instance, please cleanup manually")
8901 def _ConvertDrbdToPlain(self, feedback_fn):
8902 """Converts an instance from drbd to plain.
8905 instance = self.instance
8906 assert len(instance.secondary_nodes) == 1
8907 pnode = instance.primary_node
8908 snode = instance.secondary_nodes[0]
8909 feedback_fn("Converting template to plain")
8911 old_disks = instance.disks
8912 new_disks = [d.children[0] for d in old_disks]
8914 # copy over size and mode
8915 for parent, child in zip(old_disks, new_disks):
8916 child.size = parent.size
8917 child.mode = parent.mode
8919 # update instance structure
8920 instance.disks = new_disks
8921 instance.disk_template = constants.DT_PLAIN
8922 self.cfg.Update(instance, feedback_fn)
8924 feedback_fn("Removing volumes on the secondary node...")
8925 for disk in old_disks:
8926 self.cfg.SetDiskID(disk, snode)
8927 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8929 self.LogWarning("Could not remove block device %s on node %s,"
8930 " continuing anyway: %s", disk.iv_name, snode, msg)
8932 feedback_fn("Removing unneeded volumes on the primary node...")
8933 for idx, disk in enumerate(old_disks):
8934 meta = disk.children[1]
8935 self.cfg.SetDiskID(meta, pnode)
8936 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8938 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8939 " continuing anyway: %s", idx, pnode, msg)
8942 def Exec(self, feedback_fn):
8943 """Modifies an instance.
8945 All parameters take effect only at the next restart of the instance.
8948 # Process here the warnings from CheckPrereq, as we don't have a
8949 # feedback_fn there.
8950 for warn in self.warn:
8951 feedback_fn("WARNING: %s" % warn)
8954 instance = self.instance
8956 for disk_op, disk_dict in self.op.disks:
8957 if disk_op == constants.DDM_REMOVE:
8958 # remove the last disk
8959 device = instance.disks.pop()
8960 device_idx = len(instance.disks)
8961 for node, disk in device.ComputeNodeTree(instance.primary_node):
8962 self.cfg.SetDiskID(disk, node)
8963 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8965 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8966 " continuing anyway", device_idx, node, msg)
8967 result.append(("disk/%d" % device_idx, "remove"))
8968 elif disk_op == constants.DDM_ADD:
8970 if instance.disk_template == constants.DT_FILE:
8971 file_driver, file_path = instance.disks[0].logical_id
8972 file_path = os.path.dirname(file_path)
8974 file_driver = file_path = None
8975 disk_idx_base = len(instance.disks)
8976 new_disk = _GenerateDiskTemplate(self,
8977 instance.disk_template,
8978 instance.name, instance.primary_node,
8979 instance.secondary_nodes,
8984 instance.disks.append(new_disk)
8985 info = _GetInstanceInfoText(instance)
8987 logging.info("Creating volume %s for instance %s",
8988 new_disk.iv_name, instance.name)
8989 # Note: this needs to be kept in sync with _CreateDisks
8991 for node in instance.all_nodes:
8992 f_create = node == instance.primary_node
8994 _CreateBlockDev(self, node, instance, new_disk,
8995 f_create, info, f_create)
8996 except errors.OpExecError, err:
8997 self.LogWarning("Failed to create volume %s (%s) on"
8999 new_disk.iv_name, new_disk, node, err)
9000 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9001 (new_disk.size, new_disk.mode)))
9003 # change a given disk
9004 instance.disks[disk_op].mode = disk_dict['mode']
9005 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9007 if self.op.disk_template:
9008 r_shut = _ShutdownInstanceDisks(self, instance)
9010 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9011 " proceed with disk template conversion")
9012 mode = (instance.disk_template, self.op.disk_template)
9014 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9016 self.cfg.ReleaseDRBDMinors(instance.name)
9018 result.append(("disk_template", self.op.disk_template))
9021 for nic_op, nic_dict in self.op.nics:
9022 if nic_op == constants.DDM_REMOVE:
9023 # remove the last nic
9024 del instance.nics[-1]
9025 result.append(("nic.%d" % len(instance.nics), "remove"))
9026 elif nic_op == constants.DDM_ADD:
9027 # mac and bridge should be set, by now
9028 mac = nic_dict['mac']
9029 ip = nic_dict.get('ip', None)
9030 nicparams = self.nic_pinst[constants.DDM_ADD]
9031 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9032 instance.nics.append(new_nic)
9033 result.append(("nic.%d" % (len(instance.nics) - 1),
9034 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9035 (new_nic.mac, new_nic.ip,
9036 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9037 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9040 for key in 'mac', 'ip':
9042 setattr(instance.nics[nic_op], key, nic_dict[key])
9043 if nic_op in self.nic_pinst:
9044 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9045 for key, val in nic_dict.iteritems():
9046 result.append(("nic.%s/%d" % (key, nic_op), val))
9049 if self.op.hvparams:
9050 instance.hvparams = self.hv_inst
9051 for key, val in self.op.hvparams.iteritems():
9052 result.append(("hv/%s" % key, val))
9055 if self.op.beparams:
9056 instance.beparams = self.be_inst
9057 for key, val in self.op.beparams.iteritems():
9058 result.append(("be/%s" % key, val))
9062 instance.os = self.op.os_name
9065 if self.op.osparams:
9066 instance.osparams = self.os_inst
9067 for key, val in self.op.osparams.iteritems():
9068 result.append(("os/%s" % key, val))
9070 self.cfg.Update(instance, feedback_fn)
9074 _DISK_CONVERSIONS = {
9075 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9076 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9080 class LUQueryExports(NoHooksLU):
9081 """Query the exports list
9084 _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
9087 def ExpandNames(self):
9088 self.needed_locks = {}
9089 self.share_locks[locking.LEVEL_NODE] = 1
9090 if not self.op.nodes:
9091 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9093 self.needed_locks[locking.LEVEL_NODE] = \
9094 _GetWantedNodes(self, self.op.nodes)
9096 def Exec(self, feedback_fn):
9097 """Compute the list of all the exported system images.
9100 @return: a dictionary with the structure node->(export-list)
9101 where export-list is a list of the instances exported on
9105 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9106 rpcresult = self.rpc.call_export_list(self.nodes)
9108 for node in rpcresult:
9109 if rpcresult[node].fail_msg:
9110 result[node] = False
9112 result[node] = rpcresult[node].payload
9117 class LUPrepareExport(NoHooksLU):
9118 """Prepares an instance for an export and returns useful information.
9122 ("instance_name", _TNonEmptyString),
9123 ("mode", _TElemOf(constants.EXPORT_MODES)),
9127 def ExpandNames(self):
9128 self._ExpandAndLockInstance()
9130 def CheckPrereq(self):
9131 """Check prerequisites.
9134 instance_name = self.op.instance_name
9136 self.instance = self.cfg.GetInstanceInfo(instance_name)
9137 assert self.instance is not None, \
9138 "Cannot retrieve locked instance %s" % self.op.instance_name
9139 _CheckNodeOnline(self, self.instance.primary_node)
9141 self._cds = _GetClusterDomainSecret()
9143 def Exec(self, feedback_fn):
9144 """Prepares an instance for an export.
9147 instance = self.instance
9149 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9150 salt = utils.GenerateSecret(8)
9152 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9153 result = self.rpc.call_x509_cert_create(instance.primary_node,
9154 constants.RIE_CERT_VALIDITY)
9155 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9157 (name, cert_pem) = result.payload
9159 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9163 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9164 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9166 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9172 class LUExportInstance(LogicalUnit):
9173 """Export an instance to an image in the cluster.
9176 HPATH = "instance-export"
9177 HTYPE = constants.HTYPE_INSTANCE
9179 ("instance_name", _TNonEmptyString),
9180 ("target_node", _TNonEmptyString),
9181 ("shutdown", _TBool),
9182 ("mode", _TElemOf(constants.EXPORT_MODES)),
9185 ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9186 ("remove_instance", False),
9187 ("ignore_remove_failures", False),
9188 ("mode", constants.EXPORT_MODE_LOCAL),
9189 ("x509_key_name", None),
9190 ("destination_x509_ca", None),
9194 def CheckArguments(self):
9195 """Check the arguments.
9198 self.x509_key_name = self.op.x509_key_name
9199 self.dest_x509_ca_pem = self.op.destination_x509_ca
9201 if self.op.remove_instance and not self.op.shutdown:
9202 raise errors.OpPrereqError("Can not remove instance without shutting it"
9205 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9206 if not self.x509_key_name:
9207 raise errors.OpPrereqError("Missing X509 key name for encryption",
9210 if not self.dest_x509_ca_pem:
9211 raise errors.OpPrereqError("Missing destination X509 CA",
9214 def ExpandNames(self):
9215 self._ExpandAndLockInstance()
9217 # Lock all nodes for local exports
9218 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9219 # FIXME: lock only instance primary and destination node
9221 # Sad but true, for now we have do lock all nodes, as we don't know where
9222 # the previous export might be, and in this LU we search for it and
9223 # remove it from its current node. In the future we could fix this by:
9224 # - making a tasklet to search (share-lock all), then create the
9225 # new one, then one to remove, after
9226 # - removing the removal operation altogether
9227 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9229 def DeclareLocks(self, level):
9230 """Last minute lock declaration."""
9231 # All nodes are locked anyway, so nothing to do here.
9233 def BuildHooksEnv(self):
9236 This will run on the master, primary node and target node.
9240 "EXPORT_MODE": self.op.mode,
9241 "EXPORT_NODE": self.op.target_node,
9242 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9243 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9244 # TODO: Generic function for boolean env variables
9245 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9248 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9250 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9252 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9253 nl.append(self.op.target_node)
9257 def CheckPrereq(self):
9258 """Check prerequisites.
9260 This checks that the instance and node names are valid.
9263 instance_name = self.op.instance_name
9265 self.instance = self.cfg.GetInstanceInfo(instance_name)
9266 assert self.instance is not None, \
9267 "Cannot retrieve locked instance %s" % self.op.instance_name
9268 _CheckNodeOnline(self, self.instance.primary_node)
9270 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9271 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9272 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9273 assert self.dst_node is not None
9275 _CheckNodeOnline(self, self.dst_node.name)
9276 _CheckNodeNotDrained(self, self.dst_node.name)
9279 self.dest_disk_info = None
9280 self.dest_x509_ca = None
9282 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9283 self.dst_node = None
9285 if len(self.op.target_node) != len(self.instance.disks):
9286 raise errors.OpPrereqError(("Received destination information for %s"
9287 " disks, but instance %s has %s disks") %
9288 (len(self.op.target_node), instance_name,
9289 len(self.instance.disks)),
9292 cds = _GetClusterDomainSecret()
9294 # Check X509 key name
9296 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9297 except (TypeError, ValueError), err:
9298 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9300 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9301 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9304 # Load and verify CA
9306 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9307 except OpenSSL.crypto.Error, err:
9308 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9309 (err, ), errors.ECODE_INVAL)
9311 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9312 if errcode is not None:
9313 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9314 (msg, ), errors.ECODE_INVAL)
9316 self.dest_x509_ca = cert
9318 # Verify target information
9320 for idx, disk_data in enumerate(self.op.target_node):
9322 (host, port, magic) = \
9323 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9324 except errors.GenericError, err:
9325 raise errors.OpPrereqError("Target info for disk %s: %s" %
9326 (idx, err), errors.ECODE_INVAL)
9328 disk_info.append((host, port, magic))
9330 assert len(disk_info) == len(self.op.target_node)
9331 self.dest_disk_info = disk_info
9334 raise errors.ProgrammerError("Unhandled export mode %r" %
9337 # instance disk type verification
9338 # TODO: Implement export support for file-based disks
9339 for disk in self.instance.disks:
9340 if disk.dev_type == constants.LD_FILE:
9341 raise errors.OpPrereqError("Export not supported for instances with"
9342 " file-based disks", errors.ECODE_INVAL)
9344 def _CleanupExports(self, feedback_fn):
9345 """Removes exports of current instance from all other nodes.
9347 If an instance in a cluster with nodes A..D was exported to node C, its
9348 exports will be removed from the nodes A, B and D.
9351 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9353 nodelist = self.cfg.GetNodeList()
9354 nodelist.remove(self.dst_node.name)
9356 # on one-node clusters nodelist will be empty after the removal
9357 # if we proceed the backup would be removed because OpQueryExports
9358 # substitutes an empty list with the full cluster node list.
9359 iname = self.instance.name
9361 feedback_fn("Removing old exports for instance %s" % iname)
9362 exportlist = self.rpc.call_export_list(nodelist)
9363 for node in exportlist:
9364 if exportlist[node].fail_msg:
9366 if iname in exportlist[node].payload:
9367 msg = self.rpc.call_export_remove(node, iname).fail_msg
9369 self.LogWarning("Could not remove older export for instance %s"
9370 " on node %s: %s", iname, node, msg)
9372 def Exec(self, feedback_fn):
9373 """Export an instance to an image in the cluster.
9376 assert self.op.mode in constants.EXPORT_MODES
9378 instance = self.instance
9379 src_node = instance.primary_node
9381 if self.op.shutdown:
9382 # shutdown the instance, but not the disks
9383 feedback_fn("Shutting down instance %s" % instance.name)
9384 result = self.rpc.call_instance_shutdown(src_node, instance,
9385 self.op.shutdown_timeout)
9386 # TODO: Maybe ignore failures if ignore_remove_failures is set
9387 result.Raise("Could not shutdown instance %s on"
9388 " node %s" % (instance.name, src_node))
9390 # set the disks ID correctly since call_instance_start needs the
9391 # correct drbd minor to create the symlinks
9392 for disk in instance.disks:
9393 self.cfg.SetDiskID(disk, src_node)
9395 activate_disks = (not instance.admin_up)
9398 # Activate the instance disks if we'exporting a stopped instance
9399 feedback_fn("Activating disks for %s" % instance.name)
9400 _StartInstanceDisks(self, instance, None)
9403 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9406 helper.CreateSnapshots()
9408 if (self.op.shutdown and instance.admin_up and
9409 not self.op.remove_instance):
9410 assert not activate_disks
9411 feedback_fn("Starting instance %s" % instance.name)
9412 result = self.rpc.call_instance_start(src_node, instance, None, None)
9413 msg = result.fail_msg
9415 feedback_fn("Failed to start instance: %s" % msg)
9416 _ShutdownInstanceDisks(self, instance)
9417 raise errors.OpExecError("Could not start instance: %s" % msg)
9419 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9420 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9421 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9422 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9423 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9425 (key_name, _, _) = self.x509_key_name
9428 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9431 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9432 key_name, dest_ca_pem,
9437 # Check for backwards compatibility
9438 assert len(dresults) == len(instance.disks)
9439 assert compat.all(isinstance(i, bool) for i in dresults), \
9440 "Not all results are boolean: %r" % dresults
9444 feedback_fn("Deactivating disks for %s" % instance.name)
9445 _ShutdownInstanceDisks(self, instance)
9447 # Remove instance if requested
9448 if self.op.remove_instance:
9449 if not (compat.all(dresults) and fin_resu):
9450 feedback_fn("Not removing instance %s as parts of the export failed" %
9453 feedback_fn("Removing instance %s" % instance.name)
9454 _RemoveInstance(self, feedback_fn, instance,
9455 self.op.ignore_remove_failures)
9457 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9458 self._CleanupExports(feedback_fn)
9460 return fin_resu, dresults
9463 class LURemoveExport(NoHooksLU):
9464 """Remove exports related to the named instance.
9467 _OP_REQP = [("instance_name", _TNonEmptyString)]
9470 def ExpandNames(self):
9471 self.needed_locks = {}
9472 # We need all nodes to be locked in order for RemoveExport to work, but we
9473 # don't need to lock the instance itself, as nothing will happen to it (and
9474 # we can remove exports also for a removed instance)
9475 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9477 def Exec(self, feedback_fn):
9478 """Remove any export.
9481 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9482 # If the instance was not found we'll try with the name that was passed in.
9483 # This will only work if it was an FQDN, though.
9485 if not instance_name:
9487 instance_name = self.op.instance_name
9489 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9490 exportlist = self.rpc.call_export_list(locked_nodes)
9492 for node in exportlist:
9493 msg = exportlist[node].fail_msg
9495 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9497 if instance_name in exportlist[node].payload:
9499 result = self.rpc.call_export_remove(node, instance_name)
9500 msg = result.fail_msg
9502 logging.error("Could not remove export for instance %s"
9503 " on node %s: %s", instance_name, node, msg)
9505 if fqdn_warn and not found:
9506 feedback_fn("Export not found. If trying to remove an export belonging"
9507 " to a deleted instance please use its Fully Qualified"
9511 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9514 This is an abstract class which is the parent of all the other tags LUs.
9518 def ExpandNames(self):
9519 self.needed_locks = {}
9520 if self.op.kind == constants.TAG_NODE:
9521 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9522 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9523 elif self.op.kind == constants.TAG_INSTANCE:
9524 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9525 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9527 def CheckPrereq(self):
9528 """Check prerequisites.
9531 if self.op.kind == constants.TAG_CLUSTER:
9532 self.target = self.cfg.GetClusterInfo()
9533 elif self.op.kind == constants.TAG_NODE:
9534 self.target = self.cfg.GetNodeInfo(self.op.name)
9535 elif self.op.kind == constants.TAG_INSTANCE:
9536 self.target = self.cfg.GetInstanceInfo(self.op.name)
9538 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9539 str(self.op.kind), errors.ECODE_INVAL)
9542 class LUGetTags(TagsLU):
9543 """Returns the tags of a given object.
9547 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9548 ("name", _TNonEmptyString),
9552 def Exec(self, feedback_fn):
9553 """Returns the tag list.
9556 return list(self.target.GetTags())
9559 class LUSearchTags(NoHooksLU):
9560 """Searches the tags for a given pattern.
9563 _OP_REQP = [("pattern", _TNonEmptyString)]
9566 def ExpandNames(self):
9567 self.needed_locks = {}
9569 def CheckPrereq(self):
9570 """Check prerequisites.
9572 This checks the pattern passed for validity by compiling it.
9576 self.re = re.compile(self.op.pattern)
9577 except re.error, err:
9578 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9579 (self.op.pattern, err), errors.ECODE_INVAL)
9581 def Exec(self, feedback_fn):
9582 """Returns the tag list.
9586 tgts = [("/cluster", cfg.GetClusterInfo())]
9587 ilist = cfg.GetAllInstancesInfo().values()
9588 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9589 nlist = cfg.GetAllNodesInfo().values()
9590 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9592 for path, target in tgts:
9593 for tag in target.GetTags():
9594 if self.re.search(tag):
9595 results.append((path, tag))
9599 class LUAddTags(TagsLU):
9600 """Sets a tag on a given object.
9604 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9605 ("name", _TNonEmptyString),
9606 ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9610 def CheckPrereq(self):
9611 """Check prerequisites.
9613 This checks the type and length of the tag name and value.
9616 TagsLU.CheckPrereq(self)
9617 for tag in self.op.tags:
9618 objects.TaggableObject.ValidateTag(tag)
9620 def Exec(self, feedback_fn):
9625 for tag in self.op.tags:
9626 self.target.AddTag(tag)
9627 except errors.TagError, err:
9628 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9629 self.cfg.Update(self.target, feedback_fn)
9632 class LUDelTags(TagsLU):
9633 """Delete a list of tags from a given object.
9637 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9638 ("name", _TNonEmptyString),
9639 ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9643 def CheckPrereq(self):
9644 """Check prerequisites.
9646 This checks that we have the given tag.
9649 TagsLU.CheckPrereq(self)
9650 for tag in self.op.tags:
9651 objects.TaggableObject.ValidateTag(tag)
9652 del_tags = frozenset(self.op.tags)
9653 cur_tags = self.target.GetTags()
9654 if not del_tags <= cur_tags:
9655 diff_tags = del_tags - cur_tags
9656 diff_names = ["'%s'" % tag for tag in diff_tags]
9658 raise errors.OpPrereqError("Tag(s) %s not found" %
9659 (",".join(diff_names)), errors.ECODE_NOENT)
9661 def Exec(self, feedback_fn):
9662 """Remove the tag from the object.
9665 for tag in self.op.tags:
9666 self.target.RemoveTag(tag)
9667 self.cfg.Update(self.target, feedback_fn)
9670 class LUTestDelay(NoHooksLU):
9671 """Sleep for a specified amount of time.
9673 This LU sleeps on the master and/or nodes for a specified amount of
9678 ("duration", _TFloat),
9679 ("on_master", _TBool),
9680 ("on_nodes", _TListOf(_TNonEmptyString)),
9681 ("repeat", _TPositiveInt)
9688 def ExpandNames(self):
9689 """Expand names and set required locks.
9691 This expands the node list, if any.
9694 self.needed_locks = {}
9695 if self.op.on_nodes:
9696 # _GetWantedNodes can be used here, but is not always appropriate to use
9697 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9699 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9700 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9702 def _TestDelay(self):
9703 """Do the actual sleep.
9706 if self.op.on_master:
9707 if not utils.TestDelay(self.op.duration):
9708 raise errors.OpExecError("Error during master delay test")
9709 if self.op.on_nodes:
9710 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9711 for node, node_result in result.items():
9712 node_result.Raise("Failure during rpc call to node %s" % node)
9714 def Exec(self, feedback_fn):
9715 """Execute the test delay opcode, with the wanted repetitions.
9718 if self.op.repeat == 0:
9721 top_value = self.op.repeat - 1
9722 for i in range(self.op.repeat):
9723 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9727 class IAllocator(object):
9728 """IAllocator framework.
9730 An IAllocator instance has three sets of attributes:
9731 - cfg that is needed to query the cluster
9732 - input data (all members of the _KEYS class attribute are required)
9733 - four buffer attributes (in|out_data|text), that represent the
9734 input (to the external script) in text and data structure format,
9735 and the output from it, again in two formats
9736 - the result variables from the script (success, info, nodes) for
9740 # pylint: disable-msg=R0902
9741 # lots of instance attributes
9743 "name", "mem_size", "disks", "disk_template",
9744 "os", "tags", "nics", "vcpus", "hypervisor",
9747 "name", "relocate_from",
9753 def __init__(self, cfg, rpc, mode, **kwargs):
9756 # init buffer variables
9757 self.in_text = self.out_text = self.in_data = self.out_data = None
9758 # init all input fields so that pylint is happy
9760 self.mem_size = self.disks = self.disk_template = None
9761 self.os = self.tags = self.nics = self.vcpus = None
9762 self.hypervisor = None
9763 self.relocate_from = None
9765 self.evac_nodes = None
9767 self.required_nodes = None
9768 # init result fields
9769 self.success = self.info = self.result = None
9770 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9771 keyset = self._ALLO_KEYS
9772 fn = self._AddNewInstance
9773 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9774 keyset = self._RELO_KEYS
9775 fn = self._AddRelocateInstance
9776 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9777 keyset = self._EVAC_KEYS
9778 fn = self._AddEvacuateNodes
9780 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9781 " IAllocator" % self.mode)
9783 if key not in keyset:
9784 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9785 " IAllocator" % key)
9786 setattr(self, key, kwargs[key])
9789 if key not in kwargs:
9790 raise errors.ProgrammerError("Missing input parameter '%s' to"
9791 " IAllocator" % key)
9792 self._BuildInputData(fn)
9794 def _ComputeClusterData(self):
9795 """Compute the generic allocator input data.
9797 This is the data that is independent of the actual operation.
9801 cluster_info = cfg.GetClusterInfo()
9804 "version": constants.IALLOCATOR_VERSION,
9805 "cluster_name": cfg.GetClusterName(),
9806 "cluster_tags": list(cluster_info.GetTags()),
9807 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9808 # we don't have job IDs
9810 iinfo = cfg.GetAllInstancesInfo().values()
9811 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9815 node_list = cfg.GetNodeList()
9817 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9818 hypervisor_name = self.hypervisor
9819 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9820 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9821 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9822 hypervisor_name = cluster_info.enabled_hypervisors[0]
9824 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9827 self.rpc.call_all_instances_info(node_list,
9828 cluster_info.enabled_hypervisors)
9829 for nname, nresult in node_data.items():
9830 # first fill in static (config-based) values
9831 ninfo = cfg.GetNodeInfo(nname)
9833 "tags": list(ninfo.GetTags()),
9834 "primary_ip": ninfo.primary_ip,
9835 "secondary_ip": ninfo.secondary_ip,
9836 "offline": ninfo.offline,
9837 "drained": ninfo.drained,
9838 "master_candidate": ninfo.master_candidate,
9841 if not (ninfo.offline or ninfo.drained):
9842 nresult.Raise("Can't get data for node %s" % nname)
9843 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9845 remote_info = nresult.payload
9847 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9848 'vg_size', 'vg_free', 'cpu_total']:
9849 if attr not in remote_info:
9850 raise errors.OpExecError("Node '%s' didn't return attribute"
9851 " '%s'" % (nname, attr))
9852 if not isinstance(remote_info[attr], int):
9853 raise errors.OpExecError("Node '%s' returned invalid value"
9855 (nname, attr, remote_info[attr]))
9856 # compute memory used by primary instances
9857 i_p_mem = i_p_up_mem = 0
9858 for iinfo, beinfo in i_list:
9859 if iinfo.primary_node == nname:
9860 i_p_mem += beinfo[constants.BE_MEMORY]
9861 if iinfo.name not in node_iinfo[nname].payload:
9864 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9865 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9866 remote_info['memory_free'] -= max(0, i_mem_diff)
9869 i_p_up_mem += beinfo[constants.BE_MEMORY]
9871 # compute memory used by instances
9873 "total_memory": remote_info['memory_total'],
9874 "reserved_memory": remote_info['memory_dom0'],
9875 "free_memory": remote_info['memory_free'],
9876 "total_disk": remote_info['vg_size'],
9877 "free_disk": remote_info['vg_free'],
9878 "total_cpus": remote_info['cpu_total'],
9879 "i_pri_memory": i_p_mem,
9880 "i_pri_up_memory": i_p_up_mem,
9884 node_results[nname] = pnr
9885 data["nodes"] = node_results
9889 for iinfo, beinfo in i_list:
9891 for nic in iinfo.nics:
9892 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9893 nic_dict = {"mac": nic.mac,
9895 "mode": filled_params[constants.NIC_MODE],
9896 "link": filled_params[constants.NIC_LINK],
9898 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9899 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9900 nic_data.append(nic_dict)
9902 "tags": list(iinfo.GetTags()),
9903 "admin_up": iinfo.admin_up,
9904 "vcpus": beinfo[constants.BE_VCPUS],
9905 "memory": beinfo[constants.BE_MEMORY],
9907 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9909 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9910 "disk_template": iinfo.disk_template,
9911 "hypervisor": iinfo.hypervisor,
9913 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9915 instance_data[iinfo.name] = pir
9917 data["instances"] = instance_data
9921 def _AddNewInstance(self):
9922 """Add new instance data to allocator structure.
9924 This in combination with _AllocatorGetClusterData will create the
9925 correct structure needed as input for the allocator.
9927 The checks for the completeness of the opcode must have already been
9931 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9933 if self.disk_template in constants.DTS_NET_MIRROR:
9934 self.required_nodes = 2
9936 self.required_nodes = 1
9939 "disk_template": self.disk_template,
9942 "vcpus": self.vcpus,
9943 "memory": self.mem_size,
9944 "disks": self.disks,
9945 "disk_space_total": disk_space,
9947 "required_nodes": self.required_nodes,
9951 def _AddRelocateInstance(self):
9952 """Add relocate instance data to allocator structure.
9954 This in combination with _IAllocatorGetClusterData will create the
9955 correct structure needed as input for the allocator.
9957 The checks for the completeness of the opcode must have already been
9961 instance = self.cfg.GetInstanceInfo(self.name)
9962 if instance is None:
9963 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9964 " IAllocator" % self.name)
9966 if instance.disk_template not in constants.DTS_NET_MIRROR:
9967 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9970 if len(instance.secondary_nodes) != 1:
9971 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9974 self.required_nodes = 1
9975 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9976 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9980 "disk_space_total": disk_space,
9981 "required_nodes": self.required_nodes,
9982 "relocate_from": self.relocate_from,
9986 def _AddEvacuateNodes(self):
9987 """Add evacuate nodes data to allocator structure.
9991 "evac_nodes": self.evac_nodes
9995 def _BuildInputData(self, fn):
9996 """Build input data structures.
9999 self._ComputeClusterData()
10002 request["type"] = self.mode
10003 self.in_data["request"] = request
10005 self.in_text = serializer.Dump(self.in_data)
10007 def Run(self, name, validate=True, call_fn=None):
10008 """Run an instance allocator and return the results.
10011 if call_fn is None:
10012 call_fn = self.rpc.call_iallocator_runner
10014 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10015 result.Raise("Failure while running the iallocator script")
10017 self.out_text = result.payload
10019 self._ValidateResult()
10021 def _ValidateResult(self):
10022 """Process the allocator results.
10024 This will process and if successful save the result in
10025 self.out_data and the other parameters.
10029 rdict = serializer.Load(self.out_text)
10030 except Exception, err:
10031 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10033 if not isinstance(rdict, dict):
10034 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10036 # TODO: remove backwards compatiblity in later versions
10037 if "nodes" in rdict and "result" not in rdict:
10038 rdict["result"] = rdict["nodes"]
10041 for key in "success", "info", "result":
10042 if key not in rdict:
10043 raise errors.OpExecError("Can't parse iallocator results:"
10044 " missing key '%s'" % key)
10045 setattr(self, key, rdict[key])
10047 if not isinstance(rdict["result"], list):
10048 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10050 self.out_data = rdict
10053 class LUTestAllocator(NoHooksLU):
10054 """Run allocator tests.
10056 This LU runs the allocator tests
10060 ("direction", _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10061 ("mode", _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10062 ("name", _TNonEmptyString),
10063 ("nics", _TOr(_TNone, _TListOf(
10064 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10065 _TOr(_TNone, _TNonEmptyString))))),
10066 ("disks", _TOr(_TNone, _TList)),
10069 ("hypervisor", None),
10070 ("allocator", None),
10075 def CheckPrereq(self):
10076 """Check prerequisites.
10078 This checks the opcode parameters depending on the director and mode test.
10081 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10082 for attr in ["mem_size", "disks", "disk_template",
10083 "os", "tags", "nics", "vcpus"]:
10084 if not hasattr(self.op, attr):
10085 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10086 attr, errors.ECODE_INVAL)
10087 iname = self.cfg.ExpandInstanceName(self.op.name)
10088 if iname is not None:
10089 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10090 iname, errors.ECODE_EXISTS)
10091 if not isinstance(self.op.nics, list):
10092 raise errors.OpPrereqError("Invalid parameter 'nics'",
10093 errors.ECODE_INVAL)
10094 if not isinstance(self.op.disks, list):
10095 raise errors.OpPrereqError("Invalid parameter 'disks'",
10096 errors.ECODE_INVAL)
10097 for row in self.op.disks:
10098 if (not isinstance(row, dict) or
10099 "size" not in row or
10100 not isinstance(row["size"], int) or
10101 "mode" not in row or
10102 row["mode"] not in ['r', 'w']):
10103 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10104 " parameter", errors.ECODE_INVAL)
10105 if self.op.hypervisor is None:
10106 self.op.hypervisor = self.cfg.GetHypervisorType()
10107 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10108 fname = _ExpandInstanceName(self.cfg, self.op.name)
10109 self.op.name = fname
10110 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10111 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10112 if not hasattr(self.op, "evac_nodes"):
10113 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10114 " opcode input", errors.ECODE_INVAL)
10116 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10117 self.op.mode, errors.ECODE_INVAL)
10119 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10120 if self.op.allocator is None:
10121 raise errors.OpPrereqError("Missing allocator name",
10122 errors.ECODE_INVAL)
10123 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10124 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10125 self.op.direction, errors.ECODE_INVAL)
10127 def Exec(self, feedback_fn):
10128 """Run the allocator test.
10131 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10132 ial = IAllocator(self.cfg, self.rpc,
10135 mem_size=self.op.mem_size,
10136 disks=self.op.disks,
10137 disk_template=self.op.disk_template,
10141 vcpus=self.op.vcpus,
10142 hypervisor=self.op.hypervisor,
10144 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10145 ial = IAllocator(self.cfg, self.rpc,
10148 relocate_from=list(self.relocate_from),
10150 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10151 ial = IAllocator(self.cfg, self.rpc,
10153 evac_nodes=self.op.evac_nodes)
10155 raise errors.ProgrammerError("Uncatched mode %s in"
10156 " LUTestAllocator.Exec", self.op.mode)
10158 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10159 result = ial.in_text
10161 ial.Run(self.op.allocator, validate=False)
10162 result = ial.out_text